From e919c11f59ba1845a31b199d2518f89069671413 Mon Sep 17 00:00:00 2001 From: Ivan Koptiev Date: Mon, 7 Apr 2025 23:42:22 +0300 Subject: [PATCH 1/4] feat: first version --- .github/workflows/release.yaml | 43 + .gitignore | 2 + .goreleaser.yaml | 82 + Dockerfile | 5 + README.md | 184 +++ Taskfile.yaml | 37 + cmd/docs-scrapper/README.md | 4 + .../fireboltdocs/api_reference.md | 90 ++ .../api_reference_using_async_queries.md | 119 ++ .../api_reference_using_sync_queries.md | 88 + cmd/docs-scrapper/fireboltdocs/docs.go | 6 + cmd/docs-scrapper/fireboltdocs/guides.md | 15 + .../guides_developing_with_firebolt.md | 23 + ...loping_with_firebolt_connecting_with_go.md | 195 +++ ...ping_with_firebolt_connecting_with_jdbc.md | 183 +++ ...g_with_firebolt_connecting_with_net_sdk.md | 96 ++ ...ng_with_firebolt_connecting_with_nodejs.md | 132 ++ ...ng_with_firebolt_connecting_with_python.md | 7 + ...ith_firebolt_connecting_with_sqlalchemy.md | 11 + .../fireboltdocs/guides_exporting_data.md | 90 ++ .../fireboltdocs/guides_getting_started.md | 16 + ...ng_started_get_started_load_data_wizard.md | 48 + ...guides_getting_started_get_started_next.md | 28 + .../guides_getting_started_get_started_sql.md | 373 +++++ .../guides_integrations_airbyte.md | 94 ++ .../guides_integrations_airflow.md | 413 +++++ ...egrations_connecting_to_apache_superset.md | 105 ++ ...des_integrations_connecting_to_paradime.md | 82 + ...uides_integrations_connecting_to_preset.md | 66 + ...guides_integrations_connecting_with_dbt.md | 213 +++ .../guides_integrations_cube_js.md | 98 ++ .../guides_integrations_dbeaver.md | 59 + .../guides_integrations_estuary.md | 127 ++ .../guides_integrations_integrations.md | 16 + .../guides_integrations_metabase.md | 56 + .../guides_integrations_otel_exporter.md | 9 + .../guides_integrations_tableau.md | 79 + ...onfiguring_aws_role_to_access_amazon_s3.md | 163 ++ ...s_loading_data_creating_access_keys_aws.md | 107 ++ .../guides_loading_data_loading_data.md | 48 + .../guides_loading_data_loading_data_sql.md | 497 ++++++ ...guides_loading_data_loading_data_wizard.md | 185 +++ ...ading_data_working_with_external_tables.md | 87 + ...ith_semi_structured_data_load_json_data.md | 193 +++ ..._semi_structured_data_load_parquet_data.md | 179 +++ ...emi_structured_data_working_with_arrays.md | 387 +++++ ..._data_working_with_semi_structured_data.md | 21 + .../guides_managing_your_organization.md | 20 + ...ides_managing_your_organization_billing.md | 33 + ...r_organization_creating_an_organization.md | 36 + ...ing_your_organization_managing_accounts.md | 89 + ...aging_your_organization_managing_logins.md | 80 + ...naging_your_organization_managing_users.md | 355 ++++ ...ging_your_organization_service_accounts.md | 219 +++ .../guides_operate_engines_operate_engines.md | 10 + ...guides_operate_engines_rbac_for_engines.md | 59 + .../guides_operate_engines_sizing_engines.md | 45 + .../guides_operate_engines_system_engine.md | 111 ++ ..._engines_working_with_engines_using_ddl.md | 204 +++ .../fireboltdocs/guides_query_data.md | 10 + .../guides_query_data_using_the_api.md | 121 ++ ..._query_data_using_the_develop_workspace.md | 157 ++ .../fireboltdocs/guides_security.md | 12 + .../guides_security_enabling_mfa.md | 36 + .../guides_security_network_policies.md | 101 ++ .../fireboltdocs/guides_security_ownership.md | 77 + .../guides_security_privatelink.md | 117 ++ .../fireboltdocs/guides_security_rbac.md | 194 +++ .../fireboltdocs/guides_security_sso.md | 28 + .../fireboltdocs/guides_security_sso_auth0.md | 99 ++ .../guides_security_sso_custom_sso.md | 161 ++ .../fireboltdocs/guides_security_sso_okta.md | 95 ++ .../guides_security_sso_onelogin.md | 92 ++ .../guides_security_sso_pingfederate.md | 74 + .../guides_security_sso_salesforce.md | 83 + cmd/docs-scrapper/fireboltdocs/index.md | 47 + cmd/docs-scrapper/fireboltdocs/overview.md | 14 + .../overview_architecture_overview.md | 39 + .../fireboltdocs/overview_billing.md | 164 ++ .../overview_billing_compute_cost.md | 144 ++ .../overview_billing_storage_cost.md | 164 ++ .../fireboltdocs/overview_data_management.md | 161 ++ .../overview_engine_consumption.md | 86 + .../overview_engine_fundamentals.md | 164 ++ .../overview_indexes_aggregating_index.md | 75 + .../overview_indexes_primary_index.md | 122 ++ .../overview_indexes_using_indexes.md | 446 ++++++ .../overview_organizations_accounts.md | 228 +++ .../fireboltdocs/overview_queries.md | 25 + ...ueries_understand_query_performance_hbs.md | 58 + ..._understand_query_performance_subresult.md | 81 + .../overview_queries_understand_spilling.md | 47 + ..._security_role_based%20access%20control.md | 60 + ...%20access%20control_account_permissions.md | 105 ++ ...sed%20access%20control_check_privileges.md | 81 + ...20access%20control_database_permissions.md | 59 + ...database_permissions_schema_permissions.md | 99 ++ ..._database_permissions_table_permissions.md | 101 ++ ...l_database_permissions_view_permissions.md | 45 + ...d%20access%20control_engine_permissions.md | 54 + ...role_based%20access%20control_ownership.md | 62 + ...ased%20access%20control_role_management.md | 10 + ...%20control_role_management_custom_roles.md | 7 + ...%20control_role_management_system_roles.md | 15 + ...sed%20access%20control_role_permissions.md | 17 + ...sed%20access%20control_user_permissions.md | 19 + .../overview_security_security.md | 197 +++ cmd/docs-scrapper/fireboltdocs/product.md | 87 + .../fireboltdocs/product_product_platform.md | 33 + cmd/docs-scrapper/fireboltdocs/reference.md | 12 + .../reference_available_regions.md | 7 + .../fireboltdocs/reference_help_menu.md | 108 ++ .../reference_interval_arithmetic.md | 96 ++ .../reference_object_identifiers.md | 55 + .../reference_proof_of_concept_guide.md | 60 + .../reference_release_notes_release_notes.md | 39 + ...nce_release_notes_release_notes_archive.md | 1425 +++++++++++++++++ .../fireboltdocs/reference_reserved_words.md | 117 ++ .../fireboltdocs/reference_system_settings.md | 218 +++ .../fireboltdocs/sql_reference.md | 12 + .../sql_reference_bytea_data_type.md | 130 ++ .../fireboltdocs/sql_reference_commands.md | 91 ++ .../sql_reference_commands_access_control.md | 21 + ...nce_commands_access_control_alter_login.md | 44 + ...nds_access_control_alter_network_policy.md | 52 + ...ence_commands_access_control_alter_role.md | 17 + ...ds_access_control_alter_service_account.md | 49 + ...ence_commands_access_control_alter_user.md | 59 + ...ce_commands_access_control_create_login.md | 29 + ...ds_access_control_create_network_policy.md | 26 + ...nce_commands_access_control_create_role.md | 33 + ...s_access_control_create_service_account.md | 38 + ...nce_commands_access_control_create_user.md | 29 + ...ence_commands_access_control_drop_login.md | 25 + ...ands_access_control_drop_network_policy.md | 25 + ...rence_commands_access_control_drop_role.md | 35 + ...nds_access_control_drop_service_account.md | 25 + ...rence_commands_access_control_drop_user.md | 27 + ...reference_commands_access_control_grant.md | 103 ++ ...eference_commands_access_control_revoke.md | 96 ++ .../sql_reference_commands_data_definition.md | 26 + ..._commands_data_definition_alter_account.md | 23 + ...commands_data_definition_alter_database.md | 40 + ...ands_data_definition_alter_organization.md | 42 + ...e_commands_data_definition_alter_schema.md | 20 + ...ce_commands_data_definition_alter_table.md | 87 + ...nce_commands_data_definition_alter_view.md | 19 + ...commands_data_definition_create_account.md | 26 + ...ata_definition_create_aggregating_index.md | 186 +++ ...ommands_data_definition_create_database.md | 27 + ...s_data_definition_create_external_table.md | 327 ++++ ..._definition_create_fact_dimension_table.md | 129 ++ ...n_create_fact_dimension_table_as_select.md | 32 + ...ommands_data_definition_create_location.md | 225 +++ ...ands_data_definition_create_table_clone.md | 102 ++ ...ce_commands_data_definition_create_view.md | 38 + ...e_commands_data_definition_drop_account.md | 28 + ..._commands_data_definition_drop_database.md | 15 + ...nce_commands_data_definition_drop_index.md | 13 + ..._commands_data_definition_drop_location.md | 97 ++ ...nce_commands_data_definition_drop_table.md | 13 + ...ence_commands_data_definition_drop_view.md | 13 + ...e_commands_data_definition_use_database.md | 46 + .../sql_reference_commands_data_management.md | 11 + ...ence_commands_data_management_copy_from.md | 530 ++++++ ...erence_commands_data_management_copy_to.md | 240 +++ ...ference_commands_data_management_delete.md | 57 + ...ference_commands_data_management_insert.md | 45 + ...commands_data_management_truncate_table.md | 44 + ...ference_commands_data_management_update.md | 73 + ...ference_commands_data_management_vacuum.md | 111 ++ .../sql_reference_commands_engines.md | 14 + ...reference_commands_engines_alter_engine.md | 106 ++ ...eference_commands_engines_create_engine.md | 107 ++ ..._reference_commands_engines_drop_engine.md | 21 + ...reference_commands_engines_start_engine.md | 21 + ..._reference_commands_engines_stop_engine.md | 40 + ...l_reference_commands_engines_use_engine.md | 33 + .../sql_reference_commands_metadata.md | 12 + ...ql_reference_commands_metadata_describe.md | 25 + ..._reference_commands_metadata_show_cache.md | 23 + ...ference_commands_metadata_show_catalogs.md | 25 + ...eference_commands_metadata_show_columns.md | 23 + ...eference_commands_metadata_show_engines.md | 17 + ...eference_commands_metadata_show_indexes.md | 25 + ...reference_commands_metadata_show_tables.md | 25 + ..._reference_commands_metadata_show_views.md | 17 + .../sql_reference_commands_queries.md | 9 + .../sql_reference_commands_queries_cancel.md | 45 + .../sql_reference_commands_queries_explain.md | 298 ++++ .../sql_reference_commands_queries_pipe.md | 240 +++ ...eference_commands_queries_recommend_ddl.md | 66 + .../sql_reference_commands_queries_select.md | 623 +++++++ .../fireboltdocs/sql_reference_data_types.md | 167 ++ .../sql_reference_date_data_type.md | 95 ++ ...ference_functions_reference_aggregation.md | 29 + ...nctions_reference_aggregation_any_value.md | 47 + ...rence_aggregation_approx_count_distinct.md | 77 + ...nctions_reference_aggregation_array_agg.md | 53 + ...nce_functions_reference_aggregation_avg.md | 46 + ...functions_reference_aggregation_bit_and.md | 64 + ..._functions_reference_aggregation_bit_or.md | 65 + ...functions_reference_aggregation_bit_xor.md | 102 ++ ...unctions_reference_aggregation_bool_and.md | 34 + ...functions_reference_aggregation_bool_or.md | 34 + ...e_functions_reference_aggregation_count.md | 77 + ...unctions_reference_aggregation_hash_agg.md | 59 + ...s_reference_aggregation_hll_count_build.md | 68 + ...eference_aggregation_hll_count_distinct.md | 53 + ...s_reference_aggregation_hll_count_merge.md | 30 + ...nce_functions_reference_aggregation_max.md | 53 + ..._functions_reference_aggregation_max_by.md | 68 + ..._functions_reference_aggregation_median.md | 50 + ...nce_functions_reference_aggregation_min.md | 51 + ..._functions_reference_aggregation_min_by.md | 68 + ...s_reference_aggregation_percentile_cont.md | 91 ++ ...ctions_reference_aggregation_stddev_pop.md | 41 + ...tions_reference_aggregation_stddev_samp.md | 43 + ...nce_functions_reference_aggregation_sum.md | 59 + ...ions_reference_aggregation_variance_pop.md | 41 + ...ons_reference_aggregation_variance_samp.md | 43 + ...sql_reference_functions_reference_array.md | 19 + ..._functions_reference_array_array_concat.md | 62 + ...unctions_reference_array_array_contains.md | 42 + ...e_functions_reference_array_array_count.md | 37 + ...ns_reference_array_array_count_distinct.md | 25 + ...unctions_reference_array_array_distinct.md | 28 + ..._functions_reference_array_array_length.md | 35 + ...nce_functions_reference_array_array_max.md | 28 + ...nce_functions_reference_array_array_min.md | 28 + ...functions_reference_array_array_reverse.md | 37 + ...ions_reference_array_array_reverse_sort.md | 37 + ...ce_functions_reference_array_array_sort.md | 37 + ...nce_functions_reference_array_array_sum.md | 26 + ...nctions_reference_array_array_to_string.md | 48 + ...rence_functions_reference_array_flatten.md | 33 + ...ence_functions_reference_array_index_of.md | 28 + ...sql_reference_functions_reference_bytea.md | 7 + ..._functions_reference_bytea_convert_from.md | 77 + ...erence_functions_reference_bytea_decode.md | 48 + ...erence_functions_reference_bytea_encode.md | 48 + ...reference_conditional_and_miscellaneous.md | 17 + ...ence_conditional_and_miscellaneous_case.md | 51 + ...ence_conditional_and_miscellaneous_cast.md | 31 + ...conditional_and_miscellaneous_city_hash.md | 31 + ..._conditional_and_miscellaneous_coalesce.md | 27 + ..._conditional_and_miscellaneous_greatest.md | 25 + ...ence_conditional_and_miscellaneous_hash.md | 31 + ...erence_conditional_and_miscellaneous_if.md | 34 + ...ce_conditional_and_miscellaneous_ifnull.md | 32 + ...nce_conditional_and_miscellaneous_least.md | 25 + ...ce_conditional_and_miscellaneous_nullif.md | 45 + ..._conditional_and_miscellaneous_try_cast.md | 29 + ...ce_conditional_and_miscellaneous_typeof.md | 27 + ...e_conditional_and_miscellaneous_version.md | 18 + ...erence_functions_reference_datasketches.md | 13 + ...asketches_apache_datasketches_hll_build.md | 99 ++ ...etches_apache_datasketches_hll_estimate.md | 43 + ...asketches_apache_datasketches_hll_merge.md | 38 + ...rence_functions_reference_date_and_time.md | 17 + ...ns_reference_date_and_time_current_date.md | 35 + ...rence_date_and_time_current_timestamptz.md | 34 + ..._functions_reference_date_and_time_date.md | 38 + ...ctions_reference_date_and_time_date_add.md | 19 + ...tions_reference_date_and_time_date_diff.md | 27 + ...ions_reference_date_and_time_date_trunc.md | 47 + ...nctions_reference_date_and_time_extract.md | 40 + ..._reference_date_and_time_localtimestamp.md | 32 + ...nctions_reference_date_and_time_to_char.md | 39 + ...nctions_reference_date_and_time_to_date.md | 39 + ...ns_reference_date_and_time_to_timestamp.md | 70 + ...tions_reference_date_and_time_to_yyyymm.md | 32 + ...ons_reference_date_and_time_to_yyyymmdd.md | 32 + ..._functions_reference_functions_glossary.md | 5 + ...functions_reference_functions_reference.md | 32 + ...eference_functions_reference_geospatial.md | 19 + ...ctions_reference_geospatial_st_asbinary.md | 29 + ...unctions_reference_geospatial_st_asewkb.md | 29 + ...tions_reference_geospatial_st_asgeojson.md | 35 + ...unctions_reference_geospatial_st_astext.md | 29 + ...ctions_reference_geospatial_st_contains.md | 78 + ...unctions_reference_geospatial_st_covers.md | 66 + ...ctions_reference_geospatial_st_distance.md | 34 + ...reference_geospatial_st_geogfromgeojson.md | 35 + ...ns_reference_geospatial_st_geogfromtext.md | 29 + ...ons_reference_geospatial_st_geogfromwkb.md | 29 + ...tions_reference_geospatial_st_geogpoint.md | 29 + ...ions_reference_geospatial_st_intersects.md | 36 + ...ference_geospatial_st_s2cellidfrompoint.md | 43 + ...nce_functions_reference_geospatial_st_x.md | 29 + ...nce_functions_reference_geospatial_st_y.md | 29 + .../sql_reference_functions_reference_json.md | 117 ++ ...e_functions_reference_json_json_extract.md | 92 ++ ...tions_reference_json_json_extract_array.md | 101 ++ ...eference_json_json_pointer_extract_keys.md | 66 + ...eference_json_json_pointer_extract_text.md | 80 + ...erence_json_json_pointer_extract_values.md | 66 + ...nce_functions_reference_json_json_value.md | 63 + ...nctions_reference_json_json_value_array.md | 43 + ...ql_reference_functions_reference_lambda.md | 12 + ...ctions_reference_lambda_array_all_match.md | 53 + ...ctions_reference_lambda_array_any_match.md | 59 + ..._functions_reference_lambda_array_first.md | 28 + ...ce_functions_reference_lambda_array_sum.md | 28 + ...rence_functions_reference_lambda_filter.md | 57 + ...ce_functions_reference_lambda_transform.md | 85 + ...l_reference_functions_reference_numeric.md | 32 + ...ference_functions_reference_numeric_abs.md | 21 + ...erence_functions_reference_numeric_acos.md | 21 + ...tions_reference_numeric_array_enumerate.md | 31 + ...erence_functions_reference_numeric_asin.md | 21 + ...erence_functions_reference_numeric_atan.md | 21 + ...rence_functions_reference_numeric_atan2.md | 21 + ...ctions_reference_numeric_bit_shift_left.md | 31 + ...tions_reference_numeric_bit_shift_right.md | 31 + ...erence_functions_reference_numeric_ceil.md | 51 + ...ference_functions_reference_numeric_cos.md | 23 + ...ference_functions_reference_numeric_cot.md | 21 + ...nce_functions_reference_numeric_degrees.md | 21 + ...rence_functions_reference_numeric_floor.md | 42 + ...ns_reference_numeric_hll_count_estimate.md | 36 + ...e_functions_reference_numeric_is_finite.md | 23 + ...functions_reference_numeric_is_infinite.md | 23 + ...eference_functions_reference_numeric_ln.md | 25 + ...ference_functions_reference_numeric_log.md | 29 + ...ference_functions_reference_numeric_mod.md | 21 + ...rence_functions_reference_numeric_param.md | 54 + ...eference_functions_reference_numeric_pi.md | 17 + ...ference_functions_reference_numeric_pow.md | 23 + ...nce_functions_reference_numeric_radians.md | 21 + ...ence_functions_reference_numeric_random.md | 19 + ...rence_functions_reference_numeric_round.md | 23 + ...ference_functions_reference_numeric_sin.md | 23 + ...erence_functions_reference_numeric_sqrt.md | 27 + ...ference_functions_reference_numeric_tan.md | 23 + ...l_reference_functions_reference_session.md | 8 + ...tions_reference_session_current_account.md | 15 + ...ions_reference_session_current_database.md | 15 + ...ctions_reference_session_current_engine.md | 15 + ...unctions_reference_session_session_user.md | 33 + ...ql_reference_functions_reference_string.md | 31 + ...erence_functions_reference_string_btrim.md | 43 + ...rence_functions_reference_string_concat.md | 50 + ...s_reference_string_gen_random_uuid_text.md | 23 + ...erence_functions_reference_string_ilike.md | 49 + ...rence_functions_reference_string_length.md | 31 + ...ference_functions_reference_string_like.md | 83 + ...erence_functions_reference_string_lower.md | 36 + ...ference_functions_reference_string_lpad.md | 34 + ...erence_functions_reference_string_ltrim.md | 58 + ...functions_reference_string_octet_length.md | 29 + ...nce_functions_reference_string_position.md | 40 + ...nctions_reference_string_regexp_extract.md | 48 + ...ons_reference_string_regexp_extract_all.md | 44 + ..._functions_reference_string_regexp_like.md | 48 + ...ctions_reference_string_regexp_like_any.md | 53 + ...nctions_reference_string_regexp_replace.md | 99 ++ ...ence_functions_reference_string_replace.md | 42 + ...ference_functions_reference_string_rpad.md | 26 + ...erence_functions_reference_string_rtrim.md | 58 + ...e_functions_reference_string_split_part.md | 75 + ...ctions_reference_string_string_to_array.md | 58 + ...rence_functions_reference_string_strpos.md | 40 + ...ce_functions_reference_string_substring.md | 61 + ...ference_functions_reference_string_trim.md | 77 + ...erence_functions_reference_string_upper.md | 36 + ...e_functions_reference_string_url_decode.md | 31 + ...e_functions_reference_string_url_encode.md | 37 + ...erence_functions_reference_table_valued.md | 18 + ..._reference_table_valued_generate_series.md | 29 + ...ons_reference_table_valued_list_objects.md | 122 ++ ...nctions_reference_table_valued_read_csv.md | 110 ++ ...ons_reference_table_valued_read_parquet.md | 87 + ...ql_reference_functions_reference_vector.md | 12 + ...e_functions_reference_vector_vector_add.md | 35 + ...reference_vector_vector_cosine_distance.md | 35 + ...ference_vector_vector_cosine_similarity.md | 59 + ...erence_vector_vector_euclidean_distance.md | 35 + ...s_reference_vector_vector_inner_product.md | 61 + ...erence_vector_vector_manhattan_distance.md | 35 + ...ector_vector_squared_euclidean_distance.md | 59 + ...ctions_reference_vector_vector_subtract.md | 35 + ...ql_reference_functions_reference_window.md | 117 ++ ...e_functions_reference_window_avg_window.md | 38 + ...functions_reference_window_count_window.md | 35 + ...e_functions_reference_window_dense_rank.md | 37 + ..._functions_reference_window_first_value.md | 53 + ...eference_functions_reference_window_lag.md | 34 + ...ference_functions_reference_window_lead.md | 34 + ...e_functions_reference_window_max_window.md | 37 + ...e_functions_reference_window_min_window.md | 33 + ...erence_functions_reference_window_ntile.md | 42 + ...functions_reference_window_percent_rank.md | 36 + ...ference_functions_reference_window_rank.md | 37 + ...e_functions_reference_window_row_number.md | 36 + ...e_functions_reference_window_sum_window.md | 39 + .../sql_reference_geography_data_type.md | 173 ++ .../sql_reference_information_schema.md | 41 + ...l_reference_information_schema_accounts.md | 20 + ...nce_information_schema_applicable_roles.md | 22 + ...l_reference_information_schema_catalogs.md | 18 + ...ql_reference_information_schema_columns.md | 18 + ...erence_information_schema_enabled_roles.md | 20 + ...rence_information_schema_engine_history.md | 18 + ...ormation_schema_engine_metering_history.md | 18 + ...formation_schema_engine_metrics_history.md | 19 + ...information_schema_engine_query_history.md | 25 + ...formation_schema_engine_running_queries.md | 18 + ...mation_schema_engine_user_query_history.md | 25 + ...ql_reference_information_schema_engines.md | 20 + ...ence_information_schema_engines_billing.md | 16 + ...ql_reference_information_schema_indexes.md | 22 + ..._reference_information_schema_locations.md | 39 + ...sql_reference_information_schema_logins.md | 20 + ...nce_information_schema_network_policies.md | 18 + ...ce_information_schema_object_privileges.md | 56 + ...l_reference_information_schema_routines.md | 36 + ...l_reference_information_schema_schemata.md | 18 + ...nce_information_schema_service_accounts.md | 22 + ...ence_information_schema_storage_billing.md | 16 + ...ence_information_schema_storage_history.md | 38 + ...rmation_schema_storage_metering_history.md | 16 + ...sql_reference_information_schema_tables.md | 18 + ...tion_schema_transitive_applicable_roles.md | 24 + .../sql_reference_information_schema_users.md | 20 + .../sql_reference_information_schema_views.md | 18 + .../sql_reference_numeric_data_type.md | 77 + .../fireboltdocs/sql_reference_operators.md | 242 +++ .../sql_reference_timestampntz_data_type.md | 93 ++ .../sql_reference_timestamptz_data_type.md | 160 ++ cmd/docs-scrapper/main.go | 39 + cmd/docs-scrapper/scrapper.go | 97 ++ cmd/firebolt-mcp-server/main.go | 159 ++ go.mod | 48 + go.sum | 310 ++++ pkg/clients/database/connection.go | 149 ++ pkg/clients/database/connection_test.go | 215 +++ .../database/databasemock/connection.go | 57 + pkg/clients/database/databasemock/pool.go | 115 ++ pkg/clients/database/params.go | 67 + pkg/clients/database/params_test.go | 161 ++ pkg/clients/database/pool.go | 132 ++ pkg/clients/database/pool_test.go | 225 +++ pkg/clients/discovery/client.go | 132 ++ pkg/clients/discovery/client_test.go | 284 ++++ pkg/clients/discovery/discoverymock/client.go | 36 + pkg/helpers/args/args.go | 70 + pkg/helpers/args/args_test.go | 89 + pkg/helpers/itertools/map.go | 25 + pkg/helpers/itertools/map_test.go | 47 + pkg/helpers/mimetype/mime_type.go | 6 + pkg/prompts/firebolt.go | 38 + pkg/prompts/firebolt.md | 35 + pkg/resources/accounts.go | 90 ++ pkg/resources/accounts_test.go | 298 ++++ pkg/resources/databases.go | 100 ++ pkg/resources/databases_test.go | 307 ++++ pkg/resources/docs.go | 137 ++ pkg/resources/docs_overview.md | 208 +++ pkg/resources/docs_proof.md | 3 + pkg/resources/docs_reference.md | 6 + pkg/resources/docs_test.go | 144 ++ pkg/resources/engines.go | 100 ++ pkg/resources/engines_test.go | 313 ++++ pkg/server/server.go | 211 +++ pkg/server/server_test.go | 130 ++ pkg/tools/connect.go | 188 +++ pkg/tools/connect_test.go | 311 ++++ pkg/tools/docs.go | 110 ++ pkg/tools/docs_test.go | 246 +++ pkg/tools/query.go | 113 ++ pkg/tools/query_test.go | 270 ++++ pkg/version/version.go | 27 + 473 files changed, 34531 insertions(+) create mode 100644 .github/workflows/release.yaml create mode 100644 .gitignore create mode 100644 .goreleaser.yaml create mode 100644 Dockerfile create mode 100644 README.md create mode 100644 Taskfile.yaml create mode 100644 cmd/docs-scrapper/README.md create mode 100644 cmd/docs-scrapper/fireboltdocs/api_reference.md create mode 100644 cmd/docs-scrapper/fireboltdocs/api_reference_using_async_queries.md create mode 100644 cmd/docs-scrapper/fireboltdocs/api_reference_using_sync_queries.md create mode 100644 cmd/docs-scrapper/fireboltdocs/docs.go create mode 100644 cmd/docs-scrapper/fireboltdocs/guides.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_go.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_jdbc.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_net_sdk.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_nodejs.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_python.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_sqlalchemy.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_exporting_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_getting_started.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_load_data_wizard.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_next.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_sql.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_airbyte.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_airflow.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_apache_superset.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_paradime.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_preset.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_with_dbt.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_cube_js.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_dbeaver.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_estuary.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_integrations.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_metabase.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_otel_exporter.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_integrations_tableau.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_configuring_aws_role_to_access_amazon_s3.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_creating_access_keys_aws.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_sql.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_wizard.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_external_tables.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_json_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_parquet_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_arrays.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_semi_structured_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_billing.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_creating_an_organization.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_accounts.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_logins.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_users.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_service_accounts.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_operate_engines_operate_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_operate_engines_rbac_for_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_operate_engines_sizing_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_operate_engines_system_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_operate_engines_working_with_engines_using_ddl.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_query_data.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_api.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_develop_workspace.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_enabling_mfa.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_network_policies.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_ownership.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_privatelink.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_rbac.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_auth0.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_custom_sso.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_okta.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_onelogin.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_pingfederate.md create mode 100644 cmd/docs-scrapper/fireboltdocs/guides_security_sso_salesforce.md create mode 100644 cmd/docs-scrapper/fireboltdocs/index.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_architecture_overview.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_billing.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_billing_compute_cost.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_billing_storage_cost.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_data_management.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_engine_consumption.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_engine_fundamentals.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_indexes_aggregating_index.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_indexes_primary_index.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_indexes_using_indexes.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_organizations_accounts.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_queries.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_hbs.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_subresult.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_queries_understand_spilling.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_account_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_check_privileges.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_schema_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_table_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_view_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_engine_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_ownership.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_custom_roles.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_system_roles.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_user_permissions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/overview_security_security.md create mode 100644 cmd/docs-scrapper/fireboltdocs/product.md create mode 100644 cmd/docs-scrapper/fireboltdocs/product_product_platform.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_available_regions.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_help_menu.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_interval_arithmetic.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_object_identifiers.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_proof_of_concept_guide.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes_archive.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_reserved_words.md create mode 100644 cmd/docs-scrapper/fireboltdocs/reference_system_settings.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_bytea_data_type.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_login.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_network_policy.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_role.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_service_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_user.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_login.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_network_policy.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_role.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_service_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_user.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_login.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_network_policy.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_role.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_service_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_user.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_grant.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_revoke.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_database.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_organization.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_schema.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_table.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_view.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_aggregating_index.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_database.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_external_table.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_fact_dimension_table.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_fact_dimension_table_as_select.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_location.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_table_clone.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_create_view.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_database.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_index.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_location.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_table.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_drop_view.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_use_database.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_copy_from.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_copy_to.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_delete.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_insert.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_truncate_table.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_update.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_management_vacuum.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_alter_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_create_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_drop_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_start_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_stop_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_engines_use_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_describe.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_cache.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_catalogs.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_columns.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_indexes.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_tables.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_metadata_show_views.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries_cancel.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries_explain.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries_pipe.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries_recommend_ddl.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_commands_queries_select.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_data_types.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_date_data_type.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_any_value.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_approx_count_distinct.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_array_agg.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_avg.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_bit_and.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_bit_or.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_bit_xor.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_bool_and.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_bool_or.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_count.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_hash_agg.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_hll_count_build.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_hll_count_distinct.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_hll_count_merge.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_max.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_max_by.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_median.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_min.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_min_by.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_percentile_cont.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_stddev_pop.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_stddev_samp.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_sum.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_variance_pop.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_aggregation_variance_samp.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_concat.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_contains.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_count.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_count_distinct.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_distinct.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_length.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_max.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_min.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_reverse.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_reverse_sort.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_sort.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_sum.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_array_to_string.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_flatten.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_array_index_of.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_bytea.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_bytea_convert_from.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_bytea_decode.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_bytea_encode.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_case.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_cast.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_city_hash.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_coalesce.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_greatest.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_hash.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_if.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_ifnull.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_least.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_nullif.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_try_cast.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_typeof.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_conditional_and_miscellaneous_version.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_datasketches.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_datasketches_apache_datasketches_hll_build.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_datasketches_apache_datasketches_hll_estimate.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_datasketches_apache_datasketches_hll_merge.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_current_date.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_current_timestamptz.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_date.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_date_add.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_date_diff.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_date_trunc.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_extract.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_localtimestamp.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_to_char.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_to_date.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_to_timestamp.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_to_yyyymm.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_date_and_time_to_yyyymmdd.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_functions_glossary.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_functions_reference.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_asbinary.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_asewkb.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_asgeojson.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_astext.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_contains.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_covers.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_distance.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_geogfromgeojson.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_geogfromtext.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_geogfromwkb.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_geogpoint.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_intersects.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_s2cellidfrompoint.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_x.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_geospatial_st_y.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_extract.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_extract_array.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_pointer_extract_keys.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_pointer_extract_text.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_pointer_extract_values.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_value.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_json_json_value_array.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_array_all_match.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_array_any_match.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_array_first.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_array_sum.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_filter.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_lambda_transform.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_abs.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_acos.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_array_enumerate.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_asin.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_atan.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_atan2.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_bit_shift_left.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_bit_shift_right.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_ceil.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_cos.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_cot.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_degrees.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_floor.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_hll_count_estimate.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_is_finite.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_is_infinite.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_ln.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_log.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_mod.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_param.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_pi.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_pow.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_radians.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_random.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_round.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_sin.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_sqrt.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_numeric_tan.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_session.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_session_current_account.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_session_current_database.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_session_current_engine.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_session_session_user.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_btrim.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_concat.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_gen_random_uuid_text.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_ilike.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_length.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_like.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_lower.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_lpad.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_ltrim.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_octet_length.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_position.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_regexp_extract.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_regexp_extract_all.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_regexp_like.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_regexp_like_any.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_regexp_replace.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_replace.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_rpad.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_rtrim.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_split_part.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_string_to_array.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_strpos.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_substring.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_trim.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_upper.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_url_decode.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_string_url_encode.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_table_valued.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_table_valued_generate_series.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_table_valued_list_objects.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_table_valued_read_csv.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_table_valued_read_parquet.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_add.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_cosine_distance.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_cosine_similarity.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_euclidean_distance.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_inner_product.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_manhattan_distance.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_squared_euclidean_distance.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_vector_vector_subtract.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_avg_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_count_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_dense_rank.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_first_value.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_lag.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_lead.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_max_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_min_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_ntile.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_percent_rank.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_rank.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_row_number.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_functions_reference_window_sum_window.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_geography_data_type.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_accounts.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_applicable_roles.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_catalogs.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_columns.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_enabled_roles.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_metering_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_metrics_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_query_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_running_queries.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engine_user_query_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_engines_billing.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_indexes.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_locations.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_logins.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_network_policies.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_object_privileges.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_routines.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_schemata.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_service_accounts.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_storage_billing.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_storage_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_storage_metering_history.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_tables.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_transitive_applicable_roles.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_users.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_information_schema_views.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_numeric_data_type.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_operators.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_timestampntz_data_type.md create mode 100644 cmd/docs-scrapper/fireboltdocs/sql_reference_timestamptz_data_type.md create mode 100644 cmd/docs-scrapper/main.go create mode 100644 cmd/docs-scrapper/scrapper.go create mode 100644 cmd/firebolt-mcp-server/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 pkg/clients/database/connection.go create mode 100644 pkg/clients/database/connection_test.go create mode 100644 pkg/clients/database/databasemock/connection.go create mode 100644 pkg/clients/database/databasemock/pool.go create mode 100644 pkg/clients/database/params.go create mode 100644 pkg/clients/database/params_test.go create mode 100644 pkg/clients/database/pool.go create mode 100644 pkg/clients/database/pool_test.go create mode 100644 pkg/clients/discovery/client.go create mode 100644 pkg/clients/discovery/client_test.go create mode 100644 pkg/clients/discovery/discoverymock/client.go create mode 100644 pkg/helpers/args/args.go create mode 100644 pkg/helpers/args/args_test.go create mode 100644 pkg/helpers/itertools/map.go create mode 100644 pkg/helpers/itertools/map_test.go create mode 100644 pkg/helpers/mimetype/mime_type.go create mode 100644 pkg/prompts/firebolt.go create mode 100644 pkg/prompts/firebolt.md create mode 100644 pkg/resources/accounts.go create mode 100644 pkg/resources/accounts_test.go create mode 100644 pkg/resources/databases.go create mode 100644 pkg/resources/databases_test.go create mode 100644 pkg/resources/docs.go create mode 100644 pkg/resources/docs_overview.md create mode 100644 pkg/resources/docs_proof.md create mode 100644 pkg/resources/docs_reference.md create mode 100644 pkg/resources/docs_test.go create mode 100644 pkg/resources/engines.go create mode 100644 pkg/resources/engines_test.go create mode 100644 pkg/server/server.go create mode 100644 pkg/server/server_test.go create mode 100644 pkg/tools/connect.go create mode 100644 pkg/tools/connect_test.go create mode 100644 pkg/tools/docs.go create mode 100644 pkg/tools/docs_test.go create mode 100644 pkg/tools/query.go create mode 100644 pkg/tools/query_test.go create mode 100644 pkg/version/version.go diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml new file mode 100644 index 0000000..530c603 --- /dev/null +++ b/.github/workflows/release.yaml @@ -0,0 +1,43 @@ +name: release + +on: + pull_request: + push: + # run only against tags + tags: + - "*" + +permissions: + contents: write + packages: write + id-token: write + +jobs: + + goreleaser: + runs-on: ubuntu-latest + steps: + + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version-file: go.mod + + - name: Run GoReleaser + uses: goreleaser/goreleaser-action@v6 + with: + version: "~> v2" + args: release --clean + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Upload assets + uses: actions/upload-artifact@v4 + with: + name: firebolt-mcp-server + path: dist/* \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5444d11 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +dist/ diff --git a/.goreleaser.yaml b/.goreleaser.yaml new file mode 100644 index 0000000..a5938bf --- /dev/null +++ b/.goreleaser.yaml @@ -0,0 +1,82 @@ +version: 2 +project_name: firebolt-mcp-server + +before: + hooks: + - go mod tidy + +builds: + - main: ./cmd/firebolt-mcp-server + mod_timestamp: "{{ .CommitTimestamp }}" + buildmode: pie + env: + - CGO_ENABLED=0 + goos: + - linux + - darwin + - windows + goarch: + - amd64 + - arm64 + +archives: + - formats: [tar.gz] + # This name template makes the OS and Arch + # compatible with the results of `uname`. + name_template: >- + {{ .ProjectName }}- + {{- title .Os }}- + {{- if eq .Arch "amd64" }}x86_64 + {{- else if eq .Arch "386" }}i386 + {{- else }}{{ .Arch }}{{ end }} + {{- if .Arm }}v{{ .Arm }}{{ end }} + # Use zip for windows archives + format_overrides: + - goos: windows + formats: [zip] + +dockers: + - use: buildx + image_templates: + - "ghcr.io/firebolt-db/mcp-server:{{ .Version }}-amd64" + skip_push: true + build_flag_templates: + - --platform=linux/amd64 + - --label=org.opencontainers.image.title={{ .ProjectName }} + - --label=org.opencontainers.image.description={{ .ProjectName }} + - --label=org.opencontainers.image.url=https://github.com/firebolt-db/mcp-server + - --label=org.opencontainers.image.source=https://github.com/firebolt-db/mcp-server + - --label=org.opencontainers.image.version={{ .Version }} + - --label=org.opencontainers.image.created={{ time "2006-01-02T15:04:05Z07:00" }} + - --label=org.opencontainers.image.revision={{ .FullCommit }} + - --label=org.opencontainers.image.licenses=MIT + - use: buildx + image_templates: + - "ghcr.io/firebolt-db/mcp-server:{{ .Version }}-arm64v8" + goarch: arm64 + skip_push: true + build_flag_templates: + - --platform=linux/arm64/v8 + - --label=org.opencontainers.image.title={{ .ProjectName }} + - --label=org.opencontainers.image.description={{ .ProjectName }} + - --label=org.opencontainers.image.url=https://github.com/firebolt-db/mcp-server + - --label=org.opencontainers.image.source=https://github.com/firebolt-db/mcp-server + - --label=org.opencontainers.image.version={{ .Version }} + - --label=org.opencontainers.image.created={{ time "2006-01-02T15:04:05Z07:00" }} + - --label=org.opencontainers.image.revision={{ .FullCommit }} + - --label=org.opencontainers.image.licenses=MIT + +docker_manifests: + - name_template: ghcr.io/firebolt-db/mcp-server:latest + image_templates: + - ghcr.io/firebolt-db/mcp-server:{{ .Version }}-amd64 + - ghcr.io/firebolt-db/mcp-server:{{ .Version }}-arm64v8 + - name_template: ghcr.io/firebolt-db/mcp-server:{{ .Version }} + image_templates: + - ghcr.io/firebolt-db/mcp-server:{{ .Version }}-amd64 + - ghcr.io/firebolt-db/mcp-server:{{ .Version }}-arm64v8 + +release: {} + +npm: + - name: "@firebolt-db/mcp-server" diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..27bd63e --- /dev/null +++ b/Dockerfile @@ -0,0 +1,5 @@ +# syntax=docker/dockerfile:1 + +FROM gcr.io/distroless/base:debug +COPY ./firebolt-mcp-server /usr/local/bin/firebolt-mcp-server +ENTRYPOINT ["/usr/local/bin/firebolt-mcp-server"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..0e9eb85 --- /dev/null +++ b/README.md @@ -0,0 +1,184 @@ +

+
+ Firebolt +
+ Firebolt MCP Server +
+

+ +

+A Model Context Protocol implementation that connects your LLM to Firebolt's cloud data warehouse +

+ +

+ + Release + + + License + + + Go Version + + + Build Status + +

+ +

+ Key Features | + How To Use | + Requirements | + Architecture | + Development | + License +

+ +![screenshot](https://img.example.firebolt.io/mcp-server-demo.gif) + +## Key Features + +* **LLM Integration with Firebolt** - Connect your AI assistants directly to your data warehouse + - Enable AI agents to autonomously query your data and build analytics solutions + - Provide LLMs with specialized knowledge of Firebolt's capabilities and features + +* **SQL Query Execution** + - Direct query execution against Firebolt databases + - Support for multiple query types and execution modes + +* **Documentation Access** + - Comprehensive Firebolt documentation available to the LLM + - SQL reference, function reference, and more + +* **Account Management** + - Connect to different accounts and engines + - Manage authentication seamlessly + +* **Multi-platform Support** + - Run on any platform supporting Go binaries + - Docker container support for easy deployment + +## How To Use + +To get started with the Firebolt MCP Server, you'll need a Firebolt service account. If you don't have a Firebolt account yet, [sign up here](https://www.firebolt.io/signup). + +### Option 1: Use the Docker image + +```bash +# Run with Docker +docker run -p 8080:8080 \ + -e FIREBOLT_MCP_CLIENT_ID=your-client-id \ + -e FIREBOLT_MCP_CLIENT_SECRET=your-client-secret \ + -e FIREBOLT_MCP_TRANSPORT=sse \ + firebolt/mcp-server:latest +``` + +### Option 2: Download and run the binary + +```bash +# Download the latest release for your platform from: +# https://github.com/firebolt-db/mcp-server/releases + +# Run the server +./firebolt-mcp-server \ + --client-id your-client-id \ + --client-secret your-client-secret \ + --transport sse +``` + +### Connecting your LLM + +Once the server is running, you can connect to it using any MCP-compatible client. For example: + +```bash +# Using the OpenAI API with MCP extension +curl -X POST https://api.openai.com/v1/chat/completions \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer $OPENAI_API_KEY" \ + -d '{ + "model": "gpt-4", + "messages": [ + {"role": "system", "content": "You are a data analyst working with Firebolt."}, + {"role": "user", "content": "How many users did we have last month?"} + ], + "tools": [ + { + "type": "mcp", + "mcp": { + "endpoint": "http://localhost:8080", + "auth": { + "type": "bearer", + "token": "YOUR_TOKEN" + } + } + } + ] + }' +``` + +## Requirements + +- Firebolt service account credentials (client ID and client secret) +- For development: Go 1.24.1 or later +- For deployment: Docker (optional) + +## Architecture + +The Firebolt MCP Server implements the [Model Context Protocol](https://github.com/anthropics/anthropic-cookbook/tree/main/model_context_protocol) specification, providing: + +1. **Tools** - Task-specific capabilities provided to the LLM: + - `Connect`: Establish connections to Firebolt engines and databases + - `Docs`: Access Firebolt documentation + - `Query`: Execute SQL queries against Firebolt + +2. **Resources** - Data that can be referenced by the LLM: + - Documentation articles + - Account information + - Database schema + - Engine statistics + +3. **Prompts** - Predefined instructions for the LLM: + - Firebolt Expert: Prompts the model to act as a Firebolt specialist + +## Development + +To set up the development environment: + +```bash +# Clone this repository +git clone https://github.com/firebolt-db/mcp-server.git + +# Go into the repository +cd mcp-server + +# Install Task (if you don't have it already) +go install github.com/go-task/task/v3/cmd/task@latest + +# Update Go dependencies +task mod + +# Build the application +task build +``` + +### Running tests + +```bash +go test ./... +``` + +### Building Docker image + +```bash +docker build -t firebolt-mcp-server . +``` + +## License + +MIT + +--- + +> [firebolt.io](https://www.firebolt.io)  ·  +> GitHub [@firebolt-db](https://github.com/firebolt-db)  ·  +> Twitter [@FireboltDB](https://twitter.com/FireboltDB) diff --git a/Taskfile.yaml b/Taskfile.yaml new file mode 100644 index 0000000..097af17 --- /dev/null +++ b/Taskfile.yaml @@ -0,0 +1,37 @@ +version: 3 + +tasks: + + help: + desc: Display this help screen + silent: true + cmds: + - task --list + + mod: + desc: tidy Go modules, download dependencies + silent: true + cmd: | + go mod tidy + go mod download + + build: + desc: Build application binary + silent: true + deps: + - task: goreleaser + vars: + CLI_ARGS: build --clean --snapshot --single-target + + goreleaser: + desc: Build application binary + silent: true + cmd: | + docker run --rm --privileged \ + -v $PWD:/src \ + -v /var/run/docker.sock:/var/run/docker.sock \ + -w /src \ + -e GOOS={{OS}} \ + -e GOARCH={{ARCH}} \ + goreleaser/goreleaser:v2.8.2 \ + {{.CLI_ARGS}} diff --git a/cmd/docs-scrapper/README.md b/cmd/docs-scrapper/README.md new file mode 100644 index 0000000..fa4582a --- /dev/null +++ b/cmd/docs-scrapper/README.md @@ -0,0 +1,4 @@ +# Firebolt Documentation Scrapper + +This tools scrapes Firebolt documentation website and puts content to a local directory. +This is a temporary solution until we rework our documentation to be LLM-friendly. diff --git a/cmd/docs-scrapper/fireboltdocs/api_reference.md b/cmd/docs-scrapper/fireboltdocs/api_reference.md new file mode 100644 index 0000000..92af181 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/api_reference.md @@ -0,0 +1,90 @@ +# [](#api-reference)API reference + +The Firebolt API enables programmatic interaction with Firebolt databases for running SQL statements, retrieving data, and managing engines. Use API calls to submit queries, retrieve results, and perform administrative tasks without the user interface (UI). + +Firebolt offers official SDKs and drivers to simplify API usage. These drivers interface between your application and Firebolt, handling authentication, SQL statement submission, and result processing. + +![Use a service account and a driver to connect to the Firebolt API which returns a result.](../assets/images/API-workflow.png) + +To submit an API request, set up a Firebolt driver and use it to send a query to Firebolt, as explained in the following sections. + +**Topics:** + +- [Prerequisites](#prerequisites) – Set up your account and credentials before submitting an API request. +- [Set up a driver](#set-up-a-driver) – Download, install, and configure a Firebolt driver to send queries using the Firebolt API. +- [Submit a query](#submit-a-query) – Use a driver to connect to Firebolt and submit a query. + +## [](#prerequisites)Prerequisites + +Before you submit API queries, you need the following: + +1. **A Firebolt account** – Ensure that you have access to an active Firebolt account. If you don’t have access, you can [sign up for an account](https://www.firebolt.io/sign-up). For more information about how to register with Firebolt, see [Get started with Firebolt](/Guides/getting-started/). +2. **A Firebolt service account** – You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt. +3. **A user associated with the Firebolt service account** – You must associate a [user](/Guides/managing-your-organization/managing-users.html#-users) with your service account, and the user must have the necessary permissions to run the query on the specified database using the specified engine. +4. **Sufficient permissions** If you want to query user data through a specific engine, you must have sufficient permissions on the engine, as well as on any tables and databases you access. + +## [](#set-up-a-driver)Set up a driver + +Drivers are software components that facilitate communication between applications and databases. Use a Firebolt driver to connect to a Firebolt database, authenticate securely, and run SQL statements with minimal setup. + +Use a Firebolt driver for the following: + +- **Simplified API access** – Manage authentication and request formatting, eliminating the need for manual API calls. Requires only installation and basic configuration to connect and run SQL statements. +- **Optimized performance** – Improve query processing and connection management for faster response times. +- **Secure authentication** – Use service accounts and industry-standard methods to ensure secure access. + +Firebolt provides multiple drivers and SDKs. Refer to the following [driver documentation](/Guides/integrations/integrations.html) for installation instructions: + +- [Node.js SDK](/Guides/developing-with-firebolt/connecting-with-nodejs.html) – For JavaScript-based applications. +- [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html) – For Python-based applications and data workflows. +- [JDBC Driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html) – For Java applications. +- [SQLAlchemy](/Guides/developing-with-firebolt/connecting-with-sqlalchemy.html) – For ORM-based integrations in Python. +- [.NET SDK](/Guides/developing-with-firebolt/connecting-with-net-sdk.html) – For applications running on the .NET framework. +- [Go SDK](/Guides/developing-with-firebolt/connecting-with-go.html) – For applications using the Go programming language. + +## [](#submit-a-query)Submit a query + +After setting up a Firebolt driver, submit a query to verify connectivity and validate your credentials. + +Submitting a query through a Firebolt drivers and SDKs have similar formats. The following code example shows how to submit a query using the [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html). For other languages, consult the specific driver for details: + +``` +from firebolt.db import connect +from firebolt.client.auth import ClientCredentials + +id = "service_account_id" +secret = "service_account_secret" +engine_name = "your_engine_name" +database_name = "your_test_db" +account_name = "your_account_name" + +firstQuery = """ + SELECT 42; + """ +secondQuery = """ + SELECT 'my second query'; +""" + +with connect( + engine_name=engine_name, + database=database_name, + account_name=account_name, + auth=ClientCredentials(id, secret), +) as connection: + cursor = connection.cursor() + cursor.execute(firstQuery) + for row in cursor.fetchall(): + print(row) + # The cursor can be reused for multiple queries. + cursor.execute(secondQuery) + for row in cursor.fetchall(): + print(row) +``` + +### [](#query-types)Query types + +Firebolt supports two types of query modes: **synchronous** and **asynchronous** queries. + +A [synchronous query](/API-reference/using-sync-queries.html) waits for a response before proceeding. This mode is ideal for interactive queries that require immediate results, such as dashboard queries or user-initiated requests. Firebolt maintains an open HTTP connection for the duration of the query and streams results back as they become available. + +An [asynchronous query](/API-reference/using-async-queries.html) runs in the background, allowing your application to continue executing other tasks. This is useful for long-running queries, such as [INSERT](/sql_reference/commands/data-management/insert.html), or [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html), where waiting for a response is unnecessary. The query status can be checked periodically using a query token. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/api_reference_using_async_queries.md b/cmd/docs-scrapper/fireboltdocs/api_reference_using_async_queries.md new file mode 100644 index 0000000..1fb0c73 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/api_reference_using_async_queries.md @@ -0,0 +1,119 @@ +# [](#asynchronous-queries)Asynchronous queries + +An asynchronous query runs in the background and returns a successful response once it is accepted by the computing cluster, so that a client can proceed with other tasks without waiting for the statement to finish. The status of an asynchronous query can be checked at specified intervals, which provides flexibility, so that you can check the query’s status at meaningful times based on the expected duration of the operation. For example, a user can avoid unnecessary resource consumption by only checking the status periodically, rather than maintaining an open connection for the entire duration of the query, which might be unreliable or unnecessary for certain tasks. + +Asynchronous queries are ideal for long-running SQL statements, such as `INSERT`, `STOP ENGINE`, and `ALTER ENGINE`, where keeping an HTTP connection open is both unreliable and unnecessary, and where the statement might return zero rows. In addition, tracking them can be challenging. Using an asynchronous query allows you to check the status of operations at intervals, based on the expected duration. + +You should use asynchronous queries for any supported operation that may take more than a few minutes for which there are no results. + +**Supported asynchronous queries** + +- [INSERT](/sql_reference/commands/data-management/insert.html) – Inserts one or more values into a specified table. +- [COPY FROM](/sql_reference/commands/data-management/copy-from.html) – Loads data from an Amazon S3 bucket into Firebolt. +- [COPY TO](/sql_reference/commands/data-management/copy-to.html) – Copies the result of a `SELECT` query to an Amazon S3 location. +- [VACUUM](/sql_reference/commands/data-management/vacuum.html) – Optimizes tablets for query performance. +- [CREATE AGGREGATING INDEX](/sql_reference/commands/data-definition/create-aggregating-index.html) – Creates an index for precomputing and storing frequent aggregations. +- [CREATE AS SELECT](/sql_reference/commands/data-definition/create-fact-dimension-table-as-select.html) – Creates a table and loads data into it based on a `SELECT` query. +- [Engine commands](/sql_reference/commands/engines/) including [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html), [STOP ENGINE](/sql_reference/commands/engines/stop-engine.html), and [START ENGINE](/sql_reference/commands/engines/start-engine.html). By default, Firebolt engines finish running queries before returning results, which can take significant time. Starting an engine can also take more than a few minutes. + +## [](#how-to-submit-an-asynchronous-query)How to submit an asynchronous query + +You can only submit a synchronous query programmatically using the Firebolt API or the following listed drivers. Every SQL statement submitted using the Firebolt **Develop Space** user interface is a synchronous query. + +The following are required prerequisites to submit a query programmatically: + +1. **A Firebolt account** – Ensure that you have access to an active Firebolt account. If you don’t have access, you can [sign up for an account](https://www.firebolt.io/sign-up). For more information about how to register with Firebolt, see [Get started with Firebolt](/Guides/getting-started/). +2. **A Firebolt service account** – You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt. +3. **A user associated with the Firebolt service account** – You must associate a [user](/Guides/managing-your-organization/managing-users.html#-users) with your service account, and the user must have the necessary permissions to run the query on the specified database using the specified engine. +4. **Sufficient permissions** If you want to query user data through a specific engine, you must have sufficient permissions on the engine, as well as on any tables and databases you access. + +To submit an asynchronous query via a raw HTTP request, you must use Firebolt protocol version 2.3 or later, while query status can be checked with any client. You can verify the protocol version by checking the X-Firebolt-Protocol-Version header in API response. + +## [](#use-a-firebolt-driver)Use a Firebolt Driver + +Use a Firebolt driver to connect to a Firebolt database, authenticate securely, and run SQL statements with minimal setup. The driver provides built-in methods for running SQL statements, handling responses, and managing connections. Only some Firebolt drivers support synchronous queries. See the documentation for each driver for specific details on how to submit asynchronous queries programmatically: + +- [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html) – Firebolt Python SDK +- [Node.js](/Guides/developing-with-firebolt/connecting-with-nodejs.html) – Firebolt Node SDK + +## [](#submit-a-query)Submit a query + +Submitting a query through a Firebolt drivers and SDKs have similar formats. The following code example shows how to submit an asynchronous query using the [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html). For other languages, consult the specific driver for details: + +The following code example establishes a connection to a Firebolt database using a service account, submits an asynchronous `INSERT` statement that groups generated numbers, periodically checks its run status, and then retrieves the row count from the `example` table: + +``` +from time import sleep + +from firebolt.db import connect +from firebolt.client.auth import ClientCredentials + +id = "service_account_id" +secret = "service_account_secret" +engine_name = "your_engine_name" +database_name = "your_test_db" +account_name = "your_account_name" + +query = """ + INSERT INTO example SELECT idMod7 as id + FROM ( + SELECT id%7 as idMod7 + FROM GENERATE_SERIES(1, 10000000000) s(id) + ) + GROUP BY idMod7; + """ + +with connect( + engine_name=engine_name, + database=database_name, + account_name=account_name, + auth=ClientCredentials(id, secret), +) as connection: + cursor = connection.cursor() + + cursor.execute_async(query) # Needs firebolt-sdk 1.9.0 or later + # Token lets us check the status of the query later + token = cursor.async_query_token + print(f"Query Token: {token}") + + # Block until the query is done + # You can also do other work here + while connection.is_async_query_running(token): + print("Checking query status...") + sleep(5) + + status = "Success" if connection.is_async_query_successful(token) else "Failed" + print(f"Query Status: {status}") + + cursor.execute("SELECT count(*) FROM example;") # Should contain 7 rows + for row in cursor.fetchall(): + print(row) +``` + +### [](#check-query-statusy)Check query statusy + +The query status token is included in the initial response when the query is submitted. If needed, you can also retrieve the token from the [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view. + +To check the status of an asynchronous query, use the token with the `CALL fb_GetAsyncStatus` function as follows: + +``` +CALL fb_GetAsyncStatus(''); +``` + +The previous code example returns a single row with the following schema: + +Column Name Data Type Description account\_name TEXT The name of the account where the asynchronous query was submitted. user\_name TEXT The name of the user who submitted the asynchronous query. request\_id TEXT Unique ID of the request which submitted the asynchronous query. query\_id TEXT Unique ID of the asynchronous query. status TEXT Current status of the query: SUSPENDED, RUNNING, CANCELLED, FAILED, SUCCEEDED or IN\_DOUBT. submitted\_time TIMESTAMPTZ The time the asynchronous query was submitted. start\_time TIMESTAMPTZ The time the async query was most recently started. end\_time TIMESTAMPTZ If the asynchronous query is completed, the time it finished. error\_message TEXT If the asynchronous query failed, the error message from the failure. retries LONG The number of times the asynchronous query has retried. scanned\_bytes LONG The number of bytes scanned by the asynchronous query. scanned\_rows LONG The number of rows scanned by the asynchronous query. + +### [](#cancel-a-query)Cancel a query + +A running asynchronous query can be cancelled using the [CANCEL](/sql_reference/commands/queries/cancel.html) statement as follows: + +``` +CANCEL QUERY ''; +``` + +In the previous code example, retrieve the query ID from the [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view or from the original query submission response. + +## [](#error-handling)Error handling + +Error Type Cause Solution **Protocol version mismatch** Using an outdated Firebolt protocol version. Make sure your driver supports async queries. **Query failure** The query encounters an execution error. Check the error message in `fb_GetAsyncStatus` and validate the query syntax. **Token not found** The provided async query token is invalid or expired. Verify that the correct token is being used and that the query has not expired. **Engine does not exist or you don’t have permission to access it** The specified Firebolt engine is not running or you don’t have permission to access it. Start the engine before submitting the query and double check permissions. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/api_reference_using_sync_queries.md b/cmd/docs-scrapper/fireboltdocs/api_reference_using_sync_queries.md new file mode 100644 index 0000000..181b5f6 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/api_reference_using_sync_queries.md @@ -0,0 +1,88 @@ +# [](#synchronous-queries)Synchronous queries + +Synchronous queries in Firebolt process SQL statements and wait for a response before proceeding with other operations. These queries are best suited for interactive analytics, dashboards, and data retrieval where low-latency performance is essential. Synchronous queries complete within a single request-response cycle. + +Synchronous queries are the default query mode for submitting SQL statements in Firebolt. All statements in the [SQL reference](/sql_reference/) guide can be used inside a synchronous query. + +## [](#how-to-submit-a-synchronous-query)How to submit a synchronous query + +You can submit a synchronous query using the user interface (UI) in the Firebolt **Develop Space**. Every SQL statement submitted using the UI is a synchronous query. For more information about how to submit a SQL statement using the UI, see [Get started using SQL](/Guides/getting-started/get-started-sql.html). + +You can also submit a synchronous query programmatically using the Firebolt API. The following are required prerequisites to submit a query programmatically: + +1. **A Firebolt account** – Ensure that you have access to an active Firebolt account. If you don’t have access, you can [sign up for an account](https://www.firebolt.io/sign-up). For more information about how to register with Firebolt, see [Get started with Firebolt](/Guides/getting-started/). +2. **A Firebolt service account** – You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt. +3. **A user associated with the Firebolt service account** – You must associate a [user](/Guides/managing-your-organization/managing-users.html#-users) with your service account, and the user must have the necessary permissions to run the query on the specified database using the specified engine. +4. **Sufficient permissions** If you want to query user data through a specific engine, you must have sufficient permissions on the engine, as well as on any tables and databases you access. + +To submit a synchronous query programatically, use a Firebolt Driver to send an HTTP request with the SQL statement to Firebolt’s API endpoint. + +### [](#use-a-firebolt-driver)Use a Firebolt driver + +Use a Firebolt driver to connect to a Firebolt database, authenticate securely, and run SQL statements with minimal setup. The driver provides built-in methods for running SQL statements, handling responses, and managing connections. All Firebolt drivers support synchronous queries. See the documentation for each driver for specific details on how to submit synchronous queries programmatically: + +- [Node.js SDK](/Guides/developing-with-firebolt/connecting-with-nodejs.html) – Firebolt Node.js SDK +- [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html) – Firebolt Python SDK +- [JDBC Driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html) – Firebolt JDBC Driver +- [SQLAlchemy](/Guides/developing-with-firebolt/connecting-with-sqlalchemy.html) – Firebolt SQLAlchemy adapter +- [.NET SDK](/Guides/developing-with-firebolt/connecting-with-net-sdk.html) – Firebolt .NET SDK +- [Go SDK](/Guides/developing-with-firebolt/connecting-with-go.html) – Firebolt Go SDK + +### [](#submit-a-query)Submit a query + +After setting up a Firebolt driver, submit a query to verify connectivity and validate your credentials. + +Submitting a query through a Firebolt drivers and SDKs have similar formats. The following code example shows how to establish a connection to a Firebolt database using a service account’s credentials, runs a simple `SELECT` statement, retrieves and prints the result using the [Python SDK](/Guides/developing-with-firebolt/connecting-with-Python.html). For other languages, consult the specific driver for details. + +``` +from firebolt.db import connect +from firebolt.client.auth import ClientCredentials + +id = "service_account_id" +secret = "service_account_secret" +engine_name = "your_engine_name" +database_name = "your_test_db" +account_name = "your_account_name" + +query = """ + SELECT 42; + """ + +with connect( + engine_name=engine_name, + database=database_name, + account_name=account_name, + auth=ClientCredentials(id, secret), +) as connection: + cursor = connection.cursor() + + cursor.execute(query) + for row in cursor.fetchall(): + print(row) +``` + +#### [](#handling-long-running-synchronous-queries)Handling long-running synchronous queries + +Synchronous queries maintain an open HTTP connection for the duration of the query, and stream results back as they become available. While there is no strict time limit, queries running longer than one hour may experience connectivity interruptions. If the HTTP connection is lost, some SQL statements, including `INSERT`, continue to run by default, while `SELECT` statements are cancelled. You can modify this behavior using the [cancel\_query\_on\_connection\_drop](/Reference/system-settings.html#query-cancellation-mode-on-connection-drop) setting. + +To avoid connection issues, consider submitting long-running queries as [asynchronous](/API-reference/using-async-queries.html) queries. + +#### [](#check-query-status)Check query status + +The queries running on an engine are available in the [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view. + +#### [](#cancel-a-query)Cancel a query + +A running synchronous query can be cancelled using the [CANCEL](/sql_reference/commands/queries/cancel.html) statement as follows: + +``` +CANCEL QUERY ''; +``` + +Use the query ID retrieved from the [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view to cancel a specific query. + +## [](#error-handling)Error handling + +Common errors and solutions when using synchronous queries: + +Error Type Cause Solution **Connection loss** The HTTP connection is interrupted. Depending on the type of query, the query may still be running. Check [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) to verify, and use the `cancel_query_on_connection_drop` setting to modify behavior. **Engine does not exist or you don’t have permission to access it** The user lacks required permissions. Ensure the user has `USAGE` permission on the engine and that the engine exists. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/docs.go b/cmd/docs-scrapper/fireboltdocs/docs.go new file mode 100644 index 0000000..351c44e --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/docs.go @@ -0,0 +1,6 @@ +package fireboltdocs + +import "embed" + +//go:embed *.md +var FS embed.FS diff --git a/cmd/docs-scrapper/fireboltdocs/guides.md b/cmd/docs-scrapper/fireboltdocs/guides.md new file mode 100644 index 0000000..4912fde --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides.md @@ -0,0 +1,15 @@ +# [](#guides)Guides + +Learn how to configure, govern, develop and query with Firebolt. + +* * * + +- [Manage organization](/Guides/managing-your-organization/) +- [Get started](/Guides/getting-started/) +- [Operate Engines](/Guides/operate-engines/operate-engines.html) +- [Load data](/Guides/loading-data/loading-data.html) +- [Query data](/Guides/query-data/) +- [Configure security](/Guides/security/) +- [Develop with Firebolt](/Guides/developing-with-firebolt/) +- [Integrate with Firebolt](/Guides/integrations/integrations.html) +- [Export data](/Guides/exporting-data.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt.md new file mode 100644 index 0000000..ae6fa95 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt.md @@ -0,0 +1,23 @@ +# [](#developing-with-firebolt)Developing with Firebolt + +Firebolt provides multiple SDKs, drivers, and libraries to integrate with various programming environments, enabling developers to run queries, manage databases, and build data-driven applications efficiently. + +This guide covers how to develop with Firebolt using different languages and frameworks, including: + +- [Node.js](/Guides/developing-with-firebolt/connecting-with-nodejs.html) – Use the Firebolt Node.js SDK to interact with Firebolt databases. +- [Python](/Guides/developing-with-firebolt/connecting-with-Python.html) – Leverage the Firebolt Python SDK for data analysis and automation. +- [JDBC](/Guides/developing-with-firebolt/connecting-with-jdbc.html) – Connect Firebolt to Java-based applications with the JDBC driver. +- [SQLAlchemy](/Guides/developing-with-firebolt/connecting-with-sqlalchemy.html) – Integrate Firebolt with SQLAlchemy for ORM-based workflows. +- [.NET SDK](/Guides/developing-with-firebolt/connecting-with-net-sdk.html) – Work with Firebolt databases using .NET applications. +- [Go](/Guides/developing-with-firebolt/connecting-with-go.html) – Access Firebolt from Go applications with the Firebolt Go client. + +Each section provides installation instructions, authentication methods, and query examples tailored to the respective language or framework. + +* * * + +- [Node.js](/Guides/developing-with-firebolt/connecting-with-nodejs.html) +- [Python](/Guides/developing-with-firebolt/connecting-with-Python.html) +- [JDBC](/Guides/developing-with-firebolt/connecting-with-jdbc.html) +- [SQLAlchemy](/Guides/developing-with-firebolt/connecting-with-sqlalchemy.html) +- [.NET SDK](/Guides/developing-with-firebolt/connecting-with-net-sdk.html) +- [Go](/Guides/developing-with-firebolt/connecting-with-go.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_go.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_go.md new file mode 100644 index 0000000..370c32b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_go.md @@ -0,0 +1,195 @@ +# [](#firebolt-go-sdk-documentation)Firebolt Go SDK Documentation + +## [](#overview)Overview + +The Firebolt Go SDK is an implementation of Go’s `database/sql/driver` interface, enabling Go developers to connect to and interact with Firebolt databases seamlessly. + +## [](#prerequisites)Prerequisites + +You must have the following prerequisites before you can connect your Firebolt account to Go: + +- **Go installed and configured** on your system. The minimum supported version is 1.18 or higher. If you do not have Go installed, you can download the [latest version](https://go.dev/dl/). After installing, if you don’t have a Go module yet, you’ll need to initialize one. See the [Go documentation on modules](https://go.dev/doc/tutorial/create-module) for detailed instructions on how to create and initialize a Go module. +- **Firebolt account** – You need an active Firebolt account. If you do not have one, you can [sign up](https://go.firebolt.io/signup) for one. +- **Firebolt service account** – You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt, its ID and secret. +- **Firebolt user** – You must have a user that is [associated](/Guides/managing-your-organization/service-accounts.html#create-a-user) with your service account. The user should have [USAGE](/Overview/Security/Role-Based%20Access%20Control/database-permissions/) permission to query your database, and [OPERATE](/Overview/Security/Role-Based%20Access%20Control/engine-permissions.html) permission to start and stop an engine if it is not already started. +- **Firebolt database and engine (optional)** – You can optionally connect to a Firebolt database and/or engine. If you do not have one yet, you can [create a database](/Guides/getting-started/get-started-sql.html#create-a-database) and also [create an engine](/Guides/getting-started/get-started-sql.html#create-an-engine). You would need a database if want to access stored data in Firebolt and an engine if you want to load and query stored data. + +## [](#installation)Installation + +To install the Firebolt Go SDK, run the following `go get` command from inside your Go module: + +``` +go get github.com/firebolt-db/firebolt-go-sdk +``` + +## [](#dsn-parameters)DSN Parameters + +Go passes a data source name (DSN) to Firebolt’s Go SDK to connect to Firebolt. The SDK parses the DSN string for parameters to authenticate and connect to a Firebolt account, database, and engine. + +The DSN string supports the following parameters: + +- `client_id`: client ID of your [service account](/Guides/managing-your-organization/service-accounts.html). +- `client_secret`: client secret of your [service account](/Guides/managing-your-organization/service-accounts.html). +- `account_name`: The name of your Firebolt [account](/Guides/managing-your-organization/managing-accounts.html). +- `database`: (Optional) The name of the [database](/Overview/Security/Role-Based%20Access%20Control/database-permissions/) to connect to. +- `engine`: (Optional) The name of the [engine](/Overview/Security/Role-Based%20Access%20Control/engine-permissions.html) to run SQL queries on. + +The following is an example DSN string: + +``` +firebolt://[/]?account_name=&client_id=&client_secret=&engine= +``` + +## [](#connect-to-firebolt)Connect to Firebolt + +To establish a connection to a Firebolt database, construct a DSN string with your credentials and database details. The following example contains a script to connect to Firebolt that you can place in a file (e.g `main.go`) and run using `go run main.go` inside your Go module: + +``` +package main + +import ( + "database/sql" + "fmt" + // Import the Firebolt Go SDK + _ "github.com/firebolt-db/firebolt-go-sdk" +) + +func main() { + // Replace with your Firebolt credentials and database details + clientId := "your_client_id" + clientSecret := "your_client_secret" + accountName := "your_account_name" + databaseName := "your_database_name" // Optional parameter + engineName := "your_engine_name" // Optional parameter + dsn := fmt.Sprintf("firebolt:///%s?account_name=%s&client_id=%s&client_secret=%s&engine=%s", databaseName, accountName, clientId, clientSecret, engineName) + + // Open a connection to the Firebolt database + db, err := sql.Open("firebolt", dsn) + if err != nil { + log.Fatalf("Error opening database connection: %v\n", err) + return + } + defer db.Close() + + // Your database operations go here +} +``` + +## [](#run-queries)Run queries + +Once connected, you can run SQL queries. The following examples show you how to create a table, insert data, and retrieve data. You can place them inside the previous script under `// Your database operations go here`\`: + +``` +// Create a table +_, err = db.Exec("CREATE TABLE IF NOT EXISTS test_table (id INT, value TEXT)") +if err != nil { + log.Fatalf("Error creating table: %v\n", err) + return +} + +// Insert data into the table +_, err = db.Exec("INSERT INTO test_table (id, value) VALUES (?, ?)", 1, "sample value") +if err != nil { + log.Fatalf("Error inserting data: %v\n", err) + return +} + +// Query data from the table +rows, err := db.Query("SELECT id, value FROM test_table") +if err != nil { + log.Fatalf("Error querying data: %v\n", err) + return +} +defer rows.Close() + +// Iterate over the result set +for rows.Next() { + var id int + var value string + if err := rows.Scan(&id, &value); err != nil { + log.Fatalf("Error scanning row: %v\n", err) + return + } + log.Print("Row: id=%d, value=%s\n", id, value) +} +``` + +## [](#streaming-queries)Streaming Queries + +Firebolt supports streaming large query results using `rows.Next()`, allowing efficient processing of large datasets. + +If you enable result streaming, the query execution might finish successfully, but the actual error might be returned while iterating the rows. + +To enable streaming, use the `firebolt-go-sdk/context` package to create a context with streaming enabled: + +``` +package main + +import ( + "context" + "database/sql" + "fmt" + "log" + + "github.com/firebolt-db/firebolt-go-sdk" + fireboltContext "github.com/firebolt-db/firebolt-go-sdk/context" + +) + +func main() { + dsn := "firebolt:///your_database_name?account_name=your_account_name&client_id=your_client_id&client_secret=your_client_secret" + db, err := sql.Open("firebolt", dsn) + if err != nil { + log.Fatalf("Failed to open database: %v", err) + } + defer db.Close() + + streamingCtx := fireboltContext.WithStreaming(context.Background()) + + // Execute a query with streaming enabled. Imitate large query result + rows, err := db.QueryContext(ctx, "SELECT 123, 'data' FROM generate_series(1, 100000000)") + if err != nil { + log.Fatalf("Query execution failed: %v", err) + } + defer rows.Close() + + for rows.Next() { + var col1 string + var col2 int + if err := rows.Scan(&col1, &col2); err != nil { + log.Fatalf("Error scanning row: %v", err) + } + log.Print("Row: col1=%s, col2=%d\n", col1, col2) + } + if err := rows.Err(); err != nil { + log.Fatalf("Row iteration error: %v", err) + } +} +``` + +Streaming queries are particularly useful when dealing with large datasets, as they avoid loading the entire result set into memory at once. + +## [](#troubleshooting)Troubleshooting + +When building a DSN to connect with Firebolt using the Go SDK, follow these best practices to ensure correct connection string formatting and avoid parsing errors. The DSN must follow this structure: + +``` +firebolt:///?account_name=&client_id=&client_secret=&engine= +``` + +**Guidelines** + +- Place the database name in the URI path after `firebolt:///`. +- Use only letters, numbers, and underscores (\_) in the database name. Avoid hyphens (-), as they may cause parsing errors. +- Ensure the `account_name` matches the name shown in the Firebolt Console URL, which is usually lowercase with no special characters. +- Use the exact engine name as shown in the Firebolt Workspace. +- Do not pass the database name as a query parameter. The SDK does not support `&database=` in the DSN. + +### [](#common-errors-and-solutions)Common errors and solutions + +Error message Likely cause Solution `invalid connection string format` URI format is invalid or it contains illegal characters (like `-`) Double check the URI format and remove illegal characters. `unknown parameter name database` Attempted to pass `database` as a query parameter. Move the database name into the URI path. `error opening database connection` Incorrect connection credentials. Verify connection parameters values in the Firebolt UI and use exact values. + +## [](#additional-resources)Additional Resources + +- [Firebolt Go SDK GitHub Repository](https://github.com/firebolt-db/firebolt-go-sdk) +- [Firebolt Documentation: Connecting with Go](/Guides/developing-with-firebolt/connecting-with-go.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_jdbc.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_jdbc.md new file mode 100644 index 0000000..9a54077 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_jdbc.md @@ -0,0 +1,183 @@ +# [](#jdbc-driver)JDBC driver + +Firebolt’s [type 4](https://en.wikipedia.org/wiki/JDBC_driver#Type_4_driver_%E2%80%93_Database-Protocol_driver/Thin_Driver%28Pure_Java_driver%29) JDBC driver lets Java applications connect to Firebolt. The JDBC driver is open-source software released under an Apache 2 license. You can browse, fork, download, and contribute to its development on [GitHub](https://github.com/firebolt-db/jdbc). + +- [Download the JAR file](#download-the-jar-file) +- [Adding the Firebolt JDBC driver as a Maven dependency](#adding-the-firebolt-jdbc-driver-as-a-maven-dependency) +- [Adding the Firebolt JDBC driver as a Gradle dependency](#adding-the-firebolt-jdbc-driver-as-a-gradle-dependency) +- [Connecting to Firebolt with the JDBC driver](#connecting-to-firebolt-with-the-jdbc-driver) +- [Authentication](#authentication) + + - [Available connection parameters](#available-connection-parameters) + - [System settings as connection parameters](#system-settings-as-connection-parameters) +- [Applying system settings using SET](#applying-system-settings-using-set) +- [Connection validation](#connection-validation) +- [Full reference documentation](#full-reference-documentation) + +## [](#download-the-jar-file)Download the JAR file + +The Firebolt JDBC driver is provided as a JAR file and requires [Java 11](https://java.com/en/download/manual.jsp) or later. + +Download the driver from [GitHub JDBC releases](https://github.com/firebolt-db/jdbc/releases). + +## [](#adding-the-firebolt-jdbc-driver-as-a-maven-dependency)Adding the Firebolt JDBC driver as a Maven dependency + +To connect your project to Firebolt using [Apache Maven](https://maven.apache.org/), add the Firebolt JDBC driver as a dependency in your **pom.xml** configuration file. Link to the [Firebolt Maven repository](https://central.sonatype.com/artifact/io.firebolt/firebolt-jdbc), so that Maven can download and include the JDBC driver in your project, as shown in the following code example: + +``` + + + + + io.firebolt + firebolt-jdbc + 3.3.0 + + +``` + +In the previous code example, replace `3.3.0` with the latest version available in the [Firebolt Maven Central repository](https://central.sonatype.com/artifact/io.firebolt/firebolt-jdbc). + +## [](#adding-the-firebolt-jdbc-driver-as-a-gradle-dependency)Adding the Firebolt JDBC driver as a Gradle dependency + +If you are using the [Gradle Build Tool](https://gradle.org/), you can configure your Gradle project to use the Firebolt JDBC driver by specifying Apache’s [Maven Central](https://maven.apache.org/repository/index.html) as a repository and adding the Firebolt JDBC driver as a dependency as follows: + +``` +/* build.gradle */ + +repositories { + mavenCentral() +} + +dependencies { + implementation 'io.firebolt:firebolt-jdbc:3.3.0' +} +``` + +In the previous code example, replace `3.3.0` with the latest version available in the [Firebolt Maven Central repository](https://central.sonatype.com/artifact/io.firebolt/firebolt-jdbc). + +## [](#connecting-to-firebolt-with-the-jdbc-driver)Connecting to Firebolt with the JDBC driver + +Provide connection details to the Firebolt JDBC driver using a connection string in the following format: + +``` +jdbc:firebolt:? +``` + +In the previous connection example, the following apply: + +- `` - Specifies the name of the Firebolt database to connect to. +- `` - A list of connection parameters formatted as a standard [URL query string](https://en.wikipedia.org/wiki/Query_string#Structure). + +## [](#authentication)Authentication + +To authenticate, use a [service account ID and secret](/Guides/managing-your-organization/service-accounts.html). A service account, which is used for programmatic access to Firebolt, uses a `client_id` and a `client_secret` for identification. To ensure compatibility with tools external to Firebolt, you can specify the service account’s `client_id` as `user` and `client_secret` as `password`. + +The following are examples of how to specify connection strings for authentication and configuration: + +**Example** + +The following example connection string configures the Firebolt JDBC driver to connect to `my_database` using a specified `client_id` and `secret_id` for authentication: + +``` + jdbc:firebolt:my_database?client_id=&client_secret=&account=my_account&engine=my_engine&buffer_size=1000000&connection_timeout_millis=10000 +``` + +The previous example string also specifies an account name `my_account`, an engine name `my_engine`, a buffer size of `1000000` bytes, and a connection timeout of `10000` milliseconds, or `10` seconds. + +**Example** + +The following example provides `client_id` and `client_secret` as separate properties, rather than embedding them directly in the connection string, as shown in the previous example. + +Connection string: + +``` + jdbc:firebolt:my_database?account=my_account&engine=my_engine&buffer_size=1000000&connection_timeout_millis=10000` +``` + +Connection properties: + +``` + client_id= + client_secret= +``` + +**Example** + +The following example connects to `my_database` using only connection properties for authentication and parameters, without including any parameters directly in the string. + +Connection string: + +``` + jdbc:firebolt:my_database +``` + +Connection properties: + +``` + client_id= + client_secret= + account=my_account + engine=my_engine + buffer_size=1000000 + connection_timeout_millis=10000 +``` + +**Example** + +The following example is a minimal URL that connects to `my_database` using `client_id` and `client_secret` as connection properties for authentication, omitting the engine name and therefore connects to default engine and relying on default values for all other parameters: + +Connection string: + +``` + jdbc:firebolt:my_database +``` + +Connection properties: + +``` + client_id= + client_secret= + account=my_account +``` + +Because the previous configuration example omits specifying the engine name, `my_database` connects to the default engine. + +Since the connection string is a URI, make sure to [percent-encode](https://en.wikipedia.org/wiki/Percent-encoding) any reserved characters or special characters used in parameter keys or parameter values. + +### [](#available-connection-parameters)Available connection parameters + +The following table lists the available parameters that can be added to a Firebolt JDBC connection string. All parameter keys are case-sensitive. + +Parameter key Data type Default value Range Description client\_id TEXT No default value.   (**Required**) The Firebolt service account ID. client\_secret TEXT No default value.   (**Required**) The secret generated for the Firebolt service account. account TEXT No default value.   (**Required**) Your Firebolt account name. database TEXT No default value.   The name of the database to connect to. Takes precedence over the database name provided as a path parameter. engine TEXT The default engine attached to the specified database.   The name of the engine to connect to. buffer\_size INTEGER `65536` `1` to `2147483647` The buffer size, in bytes, that the driver uses to read the responses from the Firebolt API. connection\_timeout\_millis INTEGER `60000` `0` to `2147483647` The wait time in milliseconds before a connection to the server is considered failed. A timeout value of zero means that the connection will wait indefinitely. max\_connections\_total INTEGER `300` `1` to `2147483647` The maximum total number of connections. socket\_timeout\_millis INTEGER `0` `0` to `2147483647` The socket timeout, in milliseconds, which specifies the maximum wait time for data, defining the longest allowed inactivity between consecutive data packets. A value of zero means that there is no timeout limit. connection\_keep\_alive\_timeout\_millis INTEGER `300000` `1` to `2147483647` Defines the duration to keep a server connection open in the connection pool before it is closed. ssl\_mode TEXT `strict` `strict` or `none` When set to `strict`, the SSL or TLS certificate is validated for accuracy and authenticity. If set to `none`, certificate verification is omitted. ssl\_certificate\_path TEXT No default value.   The absolute file path for the SSL root certificate. validate\_on\_system\_engine BOOLEAN `FALSE` `TRUE` or `FALSE` When set to `TRUE`, the connection is always validated against a system engine, even if it’s connected to a regular engine. For more information, see [Connection validation](#connection-validation). + +### [](#system-settings-as-connection-parameters)System settings as connection parameters + +In addition to the parameters specified in the previous table, any [system setting](/Reference/system-settings.html) can be passed as a connection string parameter. For example, to set a custom time zone, use the following format: + +``` +jdbc:firebolt:my_database?time_zone=UTC& +``` + +## [](#applying-system-settings-using-set)Applying system settings using SET + +In addition to passing system settings as connection string parameters, any [system setting](/Reference/system-settings.html) can be passed using the SQL `SET` command. Multiple `SET` statements can be run consecutively, separated by semicolons, as shown below: + +``` +SET time_zone = 'UTC'; +SET standard_conforming_strings = false; +``` + +## [](#connection-validation)Connection validation + +The Firebolt JDBC driver validates the connection by sending a `SELECT 1` query to the system engine. If this query fails, the driver throws an exception. You can use the `validate_on_system_engine` parameter to customize validation. When it is set to `true`, the validation query is sent to the system engine, even if the connection is established with a regular engine. This feature can be useful if you want to stop the regular engine but still need to validate the connection. + +The following example configures the Firebolt JDBC driver to connect to `my_database` and validate the connection using the system engine with additional connection parameters specified in `other_connection_parameters`: + +``` +jdbc:firebolt:my_database?validate_on_system_engine=true& +``` + +## [](#full-reference-documentation)Full reference documentation + +The complete documentation for classes and methods in the Firebolt JDBC driver is available in the [Firebolt JDBC API reference guide](https://jdbc.docs.firebolt.io/javadoc/). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_net_sdk.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_net_sdk.md new file mode 100644 index 0000000..f02ea9d --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_net_sdk.md @@ -0,0 +1,96 @@ +# [](#firebolt-net-sdk)Firebolt .NET SDK + +## [](#overview)Overview + +The Firebolt .NET SDK is a software development kit designed to facilitate the integration of Firebolt’s high-performance database capabilities into .NET applications. This SDK provides developers with the tools and interfaces needed to interact with Firebolt databases efficiently, enabling effective data manipulation and query execution. + +## [](#installation)Installation + +Install the Firebolt .NET SDK by adding the NuGet package to your project. You can do this in several ways: + +### [](#via-package-manager-console)Via Package Manager Console + +``` +Install-Package FireboltNetSdk +``` + +### [](#via-net-cli)Via .NET CLI + +``` +dotnet add package FireboltNetSdk +``` + +### [](#via-packagereference)Via PackageReference + +Add the following line to your project file: + +``` + +``` + +Make sure to replace `x.x.x` with the specific version you want to use. + +### [](#via-visual-studio-ui)Via **Visual Studio UI** + +`Tools` > `NuGet Package Manager` > `Manage NuGet Packages for Solution` and search for `Firebolt` + +For more details and versioning information, please visit the [NuGet Gallery](https://www.nuget.org/packages/FireboltNetSdk/). + +## [](#quick-start)Quick Start + +Here’s a simple example to get started with the Firebolt .NET SDK: + +``` +using System.Data.Common; +using FireboltDotNetSdk.Client; + +public class Program +{ + public static async Task Main(string[] args) + { + // Name of your Firebolt account + string account = "my_firebolt_account"; + // Client credentials, that you want to use to connect + string clientId = "my_client_id"; + string clientSecret = "my_client_secret"; + // Name of database and engine to connect to (Optional) + string database = "my_database_name"; + string engine = "my_engine_name"; + + // Construct a connection string using defined parameter + string conn_string = $"account={account};clientid={clientId};clientsecret={clientSecret};database={database};engine={engine}"; + + // Create a new connection using generated connection string + using var conn = new FireboltConnection(conn_string); + // Open a connection + conn.Open(); + + // First you would need to create a command + var command = conn.CreateCommand(); + + // ... and set the SQL query + command.CommandText = "SELECT * FROM my_table"; + + // Execute a SQL query and get a DB reader + DbDataReader reader = command.ExecuteReader(); + + // Optionally you can check whether the result set has rows + Console.WriteLine($"Has rows: {reader.HasRows}"); + + // Close the connection after all operations are done + conn.Close(); + } +} +``` + +## [](#documentation)Documentation + +For more detailed documentation, including API references and advanced usage, please refer to the [README](https://github.com/firebolt-db/firebolt-net-sdk/blob/main/README.md) file in the repository. + +## [](#support)Support + +For support, issues, or contributions, please refer to the repository’s issue tracker and contributing guidelines. + +## [](#license)License + +This SDK is released under **Apache License 2.0**. Please see the [LICENSE](https://github.com/firebolt-db/firebolt-net-sdk/blob/main/LICENSE) file for more details. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_nodejs.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_nodejs.md new file mode 100644 index 0000000..de5322c --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_nodejs.md @@ -0,0 +1,132 @@ +# [](#nodejs)Node.js + +- [Overview](#overview) +- [Installation](#installation) +- [Authentication](#authentication) +- [Quick start](#quick-start) +- [Contribution](#contribution) +- [License](#license) + +## [](#overview)Overview + +The Firebolt Node SDK is a software development kit designed to facilitate the integration of Firebolt’s high-performance database capabilities into Node.js applications. This SDK provides a set of tools and interfaces for developers to interact with Firebolt databases, enabling efficient data manipulation and query execution. For more detailed documentation, including API references and advanced usage, refer to the [README](https://github.com/firebolt-db/firebolt-node-sdk/blob/main/README.md) file in the Firebolt Node SDK repository. + +## [](#installation)Installation + +To install the Firebolt Node SDK, run the following command in your project directory: + +``` +npm install firebolt-sdk +``` + +## [](#authentication)Authentication + +After installation, you must authenticate before you can use the SDK to establish connections, run queries, and manage database resources. The following code example sets up a connection using your Firebolt [service account](/Guides/managing-your-organization/service-accounts.html) credentials: + +``` +const connection = await firebolt.connect({ + auth: { + client_id: '12345678-90123-4567-8901-234567890123', + client_secret: 'secret', + }, + engineName: 'engine_name', + account: 'account_name', + database: 'database', +}); +``` + +In the previous code example, the following details apply: + +- `client_id` and `client_secret`: These are your service account credentials. Refer to Firebolt’s guide to learn how to [create a service account](/Guides/managing-your-organization/service-accounts.html#create-a-service-account) and obtain its [ID](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id) and [secret](/Guides/managing-your-organization/service-accounts.html#generate-a-secret). +- `engineName`: The name of the engine used to run your queries on. +- `database`: The target database where your tables will be stored. +- `account`: The object within your organization that encapsulates resources for storing, querying, and managing data. In the Node.js SDK, the [account](/Overview/organizations-accounts.html#accounts) parameter specifies which organizational environment the connection will use. + +## [](#quick-start)Quick start + +In the following code example, credentials are stored in environment variables. + +``` +import { Firebolt } from 'firebolt-sdk' + +// Initialize client +const firebolt = Firebolt(); + +// Establish connection to Firebolt using environment variables for credentials and configuration +const connection = await firebolt.connect({ + auth: { + client_id: process.env.FIREBOLT_CLIENT_ID, + client_secret: process.env.FIREBOLT_CLIENT_SECRET, + }, + account: process.env.FIREBOLT_ACCOUNT, + database: process.env.FIREBOLT_DATABASE, + engineName: process.env.FIREBOLT_ENGINE_NAME +}); + +// Create a "users" table +await connection.execute(` + CREATE TABLE IF NOT EXISTS users ( + id INT, + name STRING, + age INT + ) +`); + +// Insert sample data +await connection.execute(` + INSERT INTO users (id, name, age) VALUES + (1, 'Alice', 30), + (2, 'Bob', 25) +`); + +// Update rows +await connection.execute(` + UPDATE users SET age = 31 WHERE id = 1 +`); + +// Fetch data with a query +const statement = await connection.execute("SELECT * FROM users"); + +// Fetch the complete result set +const { data, meta } = await statement.fetchResult(); + +// Log metadata describing the columns of the result set +console.log(meta) +// Outputs: +// [ +// Meta { type: 'int null', name: 'id' }, +// Meta { type: 'text null', name: 'name' }, +// Meta { type: 'int null', name: 'age' } +// ] + +// Alternatively, stream the result set row by row +const { data } = await statement.streamResult(); + +data.on("metadata", metadata => { + console.log(metadata); +}); + +// Handle metadata event +data.on("error", error => { + console.log(error); +}); + +const rows = [] + +for await (const row of data) { + rows.push(row); +} + +// Log the collected rows +console.log(rows) +// Outputs: +// [ [ 1, 'Alice', 31 ], [ 2, 'Bob', 25 ] ] +``` + +## [](#contribution)Contribution + +To receive support, report issues, or contribute, please refer to the Firebolt Node SDK repository [issue tracker](https://github.com/firebolt-db/firebolt-node-sdk/issues). + +## [](#license)License + +This SDK is released under **Apache License 2.0**. See the [LICENSE](https://github.com/firebolt-db/firebolt-node-sdk/blob/main/LICENSE) file for more details. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_python.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_python.md new file mode 100644 index 0000000..873bfcb --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_python.md @@ -0,0 +1,7 @@ +# [](#python)Python + +You can use the Python SDK to work with Firebolt. See the resources below for more information. + +- [Firebolt Python SDK documentation](https://python.docs.firebolt.io/sdk_documenation/latest/) +- The [firebolt-python-sdk repository on GitHub](https://github.com/firebolt-db/firebolt-python-sdk/) +- Code examples (in Jupyter notebooks) in the SDK repository that demonstrate common [data tasks](https://github.com/firebolt-db/firebolt-python-sdk/blob/main/examples/dbapi.ipynb) and [management tasks](https://github.com/firebolt-db/firebolt-python-sdk/blob/main/examples/management.ipynb) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_sqlalchemy.md b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_sqlalchemy.md new file mode 100644 index 0000000..71ccf26 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_developing_with_firebolt_connecting_with_sqlalchemy.md @@ -0,0 +1,11 @@ +# [](#connect-with-sqlalchemy)Connect with SQLAlchemy + +SQLAlchemy is an open-source SQL toolkit and object-relational mapper for the Python programming language. + +Firebolt’s adapter for SQLAlchemy acts as an interface for other supported third-party applications including Superset and Preset. When the SQLAlchemy adapter is successfully connected, these applications are able to communicate with Firebolt databases through the REST API. + +The adapter is written in Python using the SQLAlchemy toolkit. + +### [](#get-started)Get started + +Follow the guidelines for SQLAlchemy integration in the Firebolt-SQLAlchemy [Github repository](https://github.com/firebolt-db/firebolt-sqlalchemy/). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_exporting_data.md b/cmd/docs-scrapper/fireboltdocs/guides_exporting_data.md new file mode 100644 index 0000000..3a065f3 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_exporting_data.md @@ -0,0 +1,90 @@ +# [](#export-data)Export data + +You can export data from a `SELECT` query directly to an Amazon S3 location using [COPY TO](/sql_reference/commands/data-management/copy-to.html). This method is more flexible and efficient than downloading query results manually from the **Firebolt Workspace**, making it ideal for data sharing, integration, and archival. + +## [](#how-to-export-data)How to export data + +The following code example uses `COPY TO` to export the result of a `SELECT` query from `my_table` to a specified Amazon S3 bucket in CSV format using the provided [AWS credentials](/sql_reference/commands/data-management/copy-to.html#credentials): + +``` +COPY ( + SELECT column1, column2 FROM my_table WHERE condition +) +TO 's3://your-bucket/path/' +WITH (FORMAT = 'CSV') +CREDENTIALS = ('aws_key_id'='your-key' 'aws_secret_key'='your-secret'); +``` + +## [](#choose-the-right-export-format)Choose the right export format + +Format Best For Characteristics Recommended Use **CSV (Comma-Separated)** General data exchange, spreadsheets, SQL. Simple, widely supported, and easy to read. Best for spreadsheets, databases, or general data exchange. **TSV (Tab-Separated)** Structured text data. Like CSV, but uses tabs instead of commas. Best for Excel, databases, or general data exchange. **JSON** APIs, web applications, NoSQL databases. Flexible, human-readable, and supports nested data. Best for web apps, APIs, or NoSQL integrations. **PARQUET** Big data processing, analytics workloads. Compressed, columnar, and optimized for querying. Ideal for analytics, performance-sensitive workloads, and large datasets. + +## [](#examples)Examples + +**Export data in CSV format** + +Use CSV when you need a simple, widely supported format for spreadsheets, relational databases, or data exchange. + +The following code example exports `user_id`, `event_type`, and `timestamp` data and headers from the `user_events` table to a CSV file in an Amazon S3 bucket: + +``` +COPY (SELECT user_id, event_type, timestamp FROM user_events) +TO 's3://my-export-bucket/user_events.csv' +WITH (FORMAT = 'CSV', HEADER = TRUE) +CREDENTIALS = ('aws_key_id'='your-key' 'aws_secret_key'='your-secret'); +``` + +**Export data in Parquet format** + +Parquet is best for big data workloads, as it offers compressed, columnar storage optimized for analytics and query performance. + +The following code example exports all data from the `sales_data` table to an Amazon S3 bucket in Parquet format using the provided AWS credentials: + +``` +COPY (SELECT * FROM sales_data) +TO 's3://my-export-bucket/sales_data.parquet' +WITH (FORMAT = 'PARQUET') +CREDENTIALS = ('aws_key_id'='your-key' 'aws_secret_key'='your-secret'); +``` + +**Export data in JSON format** + +JSON is ideal for APIs, web applications, and NoSQL databases, as it supports nested and flexible data structures. + +The following code example exports `order_id` and `order_details` from the `orders` table to an Amazon S3 bucket in JSON format using the provided AWS credentials: + +``` +COPY (SELECT order_id, order_details FROM orders) +TO 's3://my-export-bucket/orders.json' +WITH (FORMAT = 'JSON') +CREDENTIALS = ('aws_key_id'='your-key' 'aws_secret_key'='your-secret'); +``` + +**Export data in TSV format** + +TSV is similar to CSV but uses tab delimiters, making it useful for structured text data that may contain commas. + +The following code example exports `name`, `age`, and `city` from the `customers` table to an Amazon S3 bucket in TSV format using the provided AWS credentials: + +``` +COPY (SELECT name, age, city FROM customers) +TO 's3://my-export-bucket/customers.tsv' +WITH (FORMAT = 'TSV') +CREDENTIALS = ('aws_key_id'='your-key' 'aws_secret_key'='your-secret'); +``` + +## [](#additional-considerations)Additional Considerations + +**Performance tips** + +- Export only required columns and use filters to reduce data volume +- Ensure proper permissions are set on your S3 bucket + +**Security and credentials** + +- Always use **secure AWS credentials**. +- Use **IAM roles** instead setting credentials directly in the code for better security. + +## [](#next-steps)Next Steps + +For more information about advanced options including **compression**, **partitioning**, and **null handling**, see [COPY TO](/sql_reference/commands/data-management/copy-to.html). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_getting_started.md b/cmd/docs-scrapper/fireboltdocs/guides_getting_started.md new file mode 100644 index 0000000..202b86e --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_getting_started.md @@ -0,0 +1,16 @@ +# [](#get-started-with-firebolt)Get started with Firebolt + +Welcome to the beginning of your journey with Firebolt! This tutorial guides you through all of the steps you need to run a basic workflow which includes setting up your Firebolt account, creating a database and engine, importing a sample dataset, creating indexes, and running a query. If you encounter any issues, reach out to [support@firebolt.io](mailto:support@firebolt.io) for help. + +To get started, you must [register](https://go.firebolt.io/signup) and create a Firebolt account. Then, you can either use the **Develop Space** inside the **Firebolt Workspace**, or use the **Load data** wizard to create a database and engine, and load data. Then, you can run your first query to obtain baseline performance statistics. Next, you can tune your workflow using Firebolt’s optimization strategies to reduce query run times. You can set a primary index and use aggregating indexes to speed up your query times significantly. Lastly, you can export your data to an external table. These steps are illustrated in the following workflow: + +![Get Started](../assets/images/../../../assets/images/architecture-workflow.png) + +After you register, you can either use the [Load data wizard](/Guides/getting-started/get-started-load-data-wizard.html) or the [use SQL](/Guides/getting-started/get-started-sql.html). Use the Load data wizard if your data is in either CSV or Parquet format, and you want to use a graphical user interface to guide you through the first three steps of the workflow. Use the Firebolt **Develop Space** or an API if you prefer to enter SQL, or need a more customized workflow. + +## [](#next-steps)Next steps + +Choose either of the following: + +- [Get started using a wizard](/Guides/getting-started/get-started-load-data-wizard.html) +- [Get started using SQL](/Guides/getting-started/get-started-sql.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_load_data_wizard.md b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_load_data_wizard.md new file mode 100644 index 0000000..0a0cf68 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_load_data_wizard.md @@ -0,0 +1,48 @@ +# [](#get-started-using-a-wizard)Get started using a wizard + +The **Load data** wizard guides you through creating a database and engine, and loading data from an Amazon S3 bucket. You can specify basic configurations, including what character to use as a file delimiter, which columns to import and their schema. After loading your data, continue working in the **Develop Space** to run and optimize a query, and export to an external table, as shown in the following diagram: + +![A simple workflow using the load data wizard starts with registering, using the wizard, running a query, optimizing your workflow, and cleaning up. ](../../assets/images/get_started_wizard_workflow.png) + +## [](#register-with-firebolt)Register with Firebolt + +![The first step in getting started is to register with Firebolt.](../../assets/images/get_started_wizard_register.png) + +Use the following steps to register with Firebolt: + +1. [Sign up](https://go.firebolt.io/signup) on Firebolt’s registration page. Fill in your email, name, choose a password, and select **Get Started**. +2. Firebolt will send a confirmation to the address that you provided. To complete your registration, select **Verify** in the email to take you to Firebolt’s [login page](https://go.firebolt.io/login). +3. Type in your email and password and select **Log In**. + +New accounts receive credits ($200) to get started exploring Firebolt’s capabilities. Credits must be used within 30 days of account creation. + +Firebolt’s billing is based on engine runtime, measured in seconds. AWS S3 storage costs are passed through at the rate of $23 per TB per month. Your cost depends primarily on which engines you use and how long those engines are running. + +You can view your total cost in FBU up to the latest second and in $USD up to the latest day. For more information, see the following **Create a Database** section. For more information about costs, see [Data Warehouse Pricing](https://www.firebolt.io/pricing). If you need to buy additional credits, connect Firebolt with your AWS Marketplace account. For more information about AWS Marketplace, see the following section: [Registering through AWS Marketplace section](/Guides/getting-started/get-started-next.html#register-through-the-aws-marketplace). + +## [](#use-the-load-data-wizard)Use the Load data wizard + +![After registering, use the load data wizard to create a database, engine, and load data.](../../assets/images/get_started_wizard_wizard.png) + +You can use the **Load data** wizard to load data in either CSV or Parquet form. + +To start the **Load data** wizard, select the plus (+) icon in the **Develop Space** next to **Databases** in the left navigation pane and select **Load data**. The wizard will guide you through creating a database, an engine, and loading data. See [Load data using a wizard](/Guides/loading-data/loading-data-wizard.html#load-data-using-a-wizard) for detailed information about the workflow and the available options in the wizard. + +Even though the **Load data** wizard creates a database and engine for you, the [**Create a Database**](/Guides/getting-started/get-started-sql.html#create-a-database) and [**Create an Engine**](/Guides/getting-started/get-started-sql.html#create-an-engine) sections in the [Use SQL to load data](/Guides/getting-started/get-started-sql.html) guide contain useful information about billing for engine runtime and schema. + +To use the **Load data** wizard, select the plus (+) icon. For detailed information about how to use the **Load data** wizard, see the [Load data](/Guides/loading-data/loading-data.html) guide. + +## [](#run-query-optimize-clean-up-and-export)Run query, optimize, clean up, and export + +![After using the load data wizard, a simple workflow continues with running a query, optimization, cleaning up, and optionally exporting a dataset.](../../assets/images/get_started_wizard_next.png) + +After you have loaded your data in the wizard, the rest of the steps in getting started are the same as if you ran your workflow in SQL. You can use either the **Develop Space** in the **Firebolt Workspace** to enter SQL, or use the [Firebolt API](/Guides/query-data/using-the-api.html). + +- For information about how to get started running a query, see [Run query](/Guides/getting-started/get-started-sql.html#run-query). +- For information about how to get started optimizing your workflow, see [Optimize your workflow](get-started-sql#optimize-your-workflow). +- For information about how to get started cleaning up resources and data, see [Clean up resources](./get-started-sql#clean-up). +- For information on how to export your data, see [Export data](/Guides/getting-started/get-started-sql.html#export-data). + +## [](#next-steps)Next steps + +To continue learning about Firebolt’s architecture, capabilities, using Firebolt after your trial period, and setting up your organization, see [Resources beyond getting started](/Guides/getting-started/get-started-next.html). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_next.md b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_next.md new file mode 100644 index 0000000..aad1d86 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_next.md @@ -0,0 +1,28 @@ +# [](#resources-beyond-getting-started)Resources beyond getting started + +Now that you have successfully created your first engine and database, run your first query, created indexes, copied data into Firebolt and exported data out, you can continue exploring Firebolt’s capabilities. + +## [](#register-through-the-aws-marketplace)Register through the AWS Marketplace + +If you have exhausted your initial $200 credit, you can continue to use Firebolt after registering through the [AWS Marketplace](https://aws.amazon.com/marketplace). You must set up an account for billing in order to continue using Firebolt’s engines to run queries. + +**To register** + +1. On the [Firebolt Workspace page](https://go.firebolt.io/), select the **Configure**(![AggIndex](../../assets/images/configure-icon.png)) icon from the left navigation pane. +2. Under **Organization settings**, select **Billing**. +3. Click **Connect to AWS Marketplace** to take you to the Firebolt page on AWS Marketplace. +4. On the AWS Marketplace page, click the **View Purchase Options** in the top right hand corner of the screen. +5. Click **Setup Your Account**. + +Your account should now be associated with AWS Marketplace. + +## [](#learn-more-about-firebolt)Learn more about Firebolt + +- Learn about Firebolt’s unique [architecture](/Overview/architecture-overview.html). +- Learn more about creating tables and [managing your data](/Overview/data-management.html) in order to let Firebolt provide the fastest query times. +- Learn about the [engines](/Overview/engine-fundamentals.html) that Firebolt uses to process queries and how to select the right size. +- Learn how to [load](/Guides/loading-data/loading-data.html) different kinds of data. +- Learn more about [querying data](/Guides/query-data/). +- Learn more about using [indexes](/Overview/indexes/using-indexes.html) to optimize your query times. +- Learn how to [set up your organization](/Guides/managing-your-organization/) to use Firebolt. +- Learn how to [integrate Firebolt](/Guides/integrations/integrations.html) with third party tools and applications. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_sql.md b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_sql.md new file mode 100644 index 0000000..da70cb4 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_getting_started_get_started_sql.md @@ -0,0 +1,373 @@ +# [](#get-started-using-sql)Get started using SQL + +You can also use SQL to create a database and engine, and load data. If you use the **Develop Space** inside the **Firebolt Workspace**, you can customize your workflow to handle more unique workflows than with the **Load data** wizard, including loading data in TSV, Avro, JSON Lines, or ORC formats. + +The following sections will guide you through a simple workflow to register, create a database and engine, load and query data, learn how to optimize your workflow, and clean up resources as shown in the following diagram: +![A simple workflow using SQL includes registering, creating a database and engine, loading data, querying data, optimizing your workflow, cleaning up, and optionally exporting data.](../../assets/images/get_started_sql_workflow.png) + +## [](#register-with-firebolt)Register with Firebolt + +![The first step in the workflow is to register with Firebolt.](../../assets/images/get_started_sql_register.png) +To get started using Firebolt, begin by registering using the following steps: + +1. [Register](https://go.firebolt.io/signup) with Firebolt. Fill in your email, name, choose a password, and select ‘Get Started’. +2. Firebolt will send a confirmation to the address that you provided. To complete your registration, select **Verify** in the email to take you to Firebolt’s [login page](https://go.firebolt.io/login). +3. Type in your email and password and select **Log In**. + +New accounts receive credits ($200) to get started exploring Firebolt’s capabilities. These credits must be used within 30 days of account creation. + +Firebolt’s billing is based on engine runtime, measured in seconds. We also pass through AWS S3 storage costs at the rate of $23 per TB per month. The amount that you spend is dependent primarily on which engines you use and how long those engines are running. + +You can view your total cost in FBU up to the latest second and in $USD up to the latest day. For more information, see the following **Create a Database** section. For more information about costs, see [Data Warehouse Pricing](https://www.firebolt.io/pricing). If you need to buy additional credits, connect Firebolt with your AWS Marketplace account. For more information about AWS Marketplace, see the following section: [Registering through AWS Marketplace section](/Guides/getting-started/get-started-next.html#register-through-the-aws-marketplace). + +## [](#create-a-database)Create a Database + +![After registering, create a database.](../../assets/images/get_started_sql_database.png) +Firebolt decouples storage and compute resources so that multiple engines can run computations on the same database. You can also configure different engine sizes for different workloads. These workloads can run in parallel or separately. Because storage is decoupled from compute, you must first create both a database and an engine before you can run your first query. + +Firebolt’s structure is organized as follows: + +- A database holds the elements that you need to run queries such as tables, views and information schema. +- An [engine](/Overview/engine-fundamentals.html) provides the compute resources for ingesting data and running queries. For more information on using Firebolt engines and how to select the correct size for your workload, see [Operate engines](/Guides/operate-engines/operate-engines.html). + +If you used the **Load data** wizard, Firebolt has already created a database for you, and you can skip creating a database. + +The following instructions show you how to create a database and then an engine. Note that you can also create the engine first. + +1. In the left navigation pane, select the **+** to the right **Databases**. +2. Select **Create new database**. +3. Enter the name for your database in the **Database Name** field. For this example, use “tutorial\_database” as your database name. In Firebolt, the names of engines and databases are **case-sensitive**. If you are using uppercase characters in their names, enclose their name inside double quotes (“) when you refer to them in SQL. + +Firebolt creates a new database with the following two default schemas: + +- **Public** - A namespace where you can create and manage your database objects including tables, engines and queries. The default schema includes **tables**, **external tables**, and **views**. +- **Information\_schema** - A standardized set of read-only views that provide metadata about database objects including tables, engines, cost information, and queries. + +You can find these schema by selecting your database under **Databases** in the left navigation pane. Next to the name of your database, select the drop-down arrow to expand and view the schemas and their contents. You can view your total cost in FBU up to the latest second and in $USD up to the latest day in **Information\_schema**. + +If you’re using the **Develop Space**, expand **Information\_schema**, and then **Views** to show the following: + +- **engine\_metering\_history** - contains information about billing cost in FBU up to the latest second in **consumed\_fbu**. +- **engine\_billing** - contains information about billing cost in US dollars up to the latest day in **billed\_cost**. + +To see values for the previous costs, select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)) next to either **consumed\_fbu** or **billed\_cost**, Then select **Preview data**. You can also run a query in the script tab as shown in the following code example: + +``` +SELECT * +FROM information_schema.engine_metering_history +``` + +## [](#create-an-engine)Create an Engine + +![After creating a database, create an engine.](../../assets/images/get_started_sql_engine.png) +To process a query, you must use an engine. You can either create an engine based on the following recommendations, or use the system engine. You can only use the system engine to run metadata-related queries, but it is always running, so you don’t have to wait for it to start. You can use the system engine to process data in any database. If you create your own engine, there is a small start up time associated with it. + +Firebolt recommends the following initial engine configurations based on where you are in your exploration of Firebolt’s capabilities. An FBU stands for a Firebolt Unit, and is equivalent to 35 US cents. + +Each FBU is related to the amount of time as follows: + +Task Expected Usage Ingest initial data 4-16 FBU Run test queries 8-32 FBU Find optimal query performance 32-240 FBU Find optimal test integrations 32-240 FBU + +Each engine node can cache data locally to improve performance. + +Small and medium engines are available for use right away. If you want to use a large or extra-large engine, reach out to support@firebolt.io. The default engine configuration uses a small node, which is sufficient for this tutorial. To learn more about how to select the correct engine size for your workload, see [Sizing Engines](/Guides/operate-engines/sizing-engines.html). + +By default, when you login to **Firebolt’s Workspace** for the first time, Firebolt creates a tab in the **Develop Space** called **Script 1**. The following apply: + +- The database that **Script 1** will run using is located directly below the tab name. If you want to change the database, select another database from the drop-down list. +- An engine must be running to process the script in a selected tab. The name and status of the engine that **Script 1** uses for computation is located to the right of the current selected database. To change either the engine or the status, select the drop-down arrow next to the engine name. You can select a new engine and change its status from **Stopped** to **Running** by selecting **Start engine**. If you select **Run** at the bottom of the workspace, the selected engine starts automatically. Select **Stop engine** to change the status to **Stopped**. Firebolt automatically stops your engine if it is inactive for 20 minutes. + +Because an engine is a dedicated compute node that nobody else can use, you are charged for each second that your engine is **Running**, even if it’s not processing a query. + +If you used the **Load data** wizard, Firebolt has already created an engine for you, and you can skip the following step. + +1. Select the **(+)** icon next to **Databases**. +2. Select **Create new engine** from the drop-down list. +3. Enter the name of your engine in the **New engine name** text box. For this example, enter “tutorial\_engine” as your engine name. + +## [](#load-data)Load Data + +![After creating an engine, you can load your data.](../../assets/images/get_started_sql_load.png) + +After creating an engine, you can load your data. This tutorial uses Firebolt’s publicly available Firebolt’s sample dataset, from the fictional [“Ultra Fast Gaming Inc.”](https://help.firebolt.io/t/ultra-fast-gaming-firebolt-sample-dataset/250) company. This dataset does not require access credentials. If your personal dataset requires access credentials, you will need to provide them. For examples of how to provide access credentials and more complex loading workflows, see [Loading data](/Guides/loading-data/loading-data.html). For more information about AWS access credentials, see [Creating Access key and Secret ID](/Guides/loading-data/creating-access-keys-aws.html) + +If you used the **Load data** wizard, skip ahead to the following **Run query** section. + +Use [COPY FROM](/sql_reference/commands/data-management/copy-from.html) in the **Develop Space** to copy data directly from a source into a Firebolt managed table. + +1. Enter the following into the **Script 1** tab to load data using the following steps: + + ``` + COPY INTO tutorial FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' WITH HEADER=TRUE; + ``` + + For examples of more complex loading workflows, see [Load data](/Guides/loading-data/loading-data.html). +2. Select **Run**. +3. In the left navigation pane under the **Tutorial\_Database**, **Tables** now contains the **tutorial** table. +4. Expand the drop down menu next to **Columns** to view the name and data format of each column. +5. Select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)) next to the data type of each column name to open a pop-up that allows you to insert the name of the column into your SQL script. You can also select **Preview data**. +6. To view the contents of the **tutorial** table, run a SELECT query as shown in the following code example. To run this in a new tab, select the (**+**) icon next to the **Script 1** tab. + + ``` + SELECT + * + FROM + tutorial + ``` +7. Select **Run**. The bottom of your workspace includes information about your processing job in the following tabs: + + - The **Results** tab at the bottom of your **Develop Space** shows the contents returned by your query. After running the previous SELECT statement, the **Results** tab should display column names and values for the data in the tutorial. + + + + - Select the filter icon (![Filter icon](../../assets/images/filter-icon.png)) to change which columns are shown. + - Select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)) to export the contents of the **Results** tab to a JSON or CSV file. + - The Statistics tab shows information about running your query including how long it took to run and its status. After running the previous SELECT statement, the **Statistics** tab shows the status of the statement, its STATUS as having succeeded or failed, how long it took to run the query, the number of rows processed, and the amount of data scanned. + - Select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)) to export the contents of the **Statistics** tab to a JSON or CSV file. + - The **Query Profile** tab contains metrics for each operator used in your query and a **Query id**. Select an operation to view its metrics. These metrics include the following: + + - The output cardinality - the number of rows that each operator produced. + - The thread time - the sum of the wall clock time that threads spent to run the selected operation across all nodes. + - The CPU time - the sum of the time that threads that ran the operator were scheduled on a CPU core. + - The output types - the data types of the result of the operator. + +You can use these metrics to analyze and measure the efficiency and performance of your query. For example, If the CPU time is much smaller than thread time, the input-output (IO) latency may be high or the engine that you are using may be running multiple queries at the same time. For more information, see [Example with ANALYZE](/sql_reference/commands/queries/explain.html). + +- The **Engine monitoring** tab shows monitoring information including the percent CPU, memory, disk use and cache read. Information is shown from the last 5 minutes by default. Select a different time interval from the drop-down menu next to **Last 5 minutes**. You can also select the **Refresh** icon next to the drop-down menu to update the graphical information. +- The **Query history** tab shows detailed information associated with each query, listed by its **Query id**. This information includes the query status, start time, number of rows and bytes scanned during the load, user and account information. You can choose the following options at the top of the bottom panel: + + - Select the **Refresh** icon to update the query history and ID. + - Select the filter icon (![Filter data icon](../../assets/images/filter-icon.png)) to remove or add columns to display. + - Select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)) to export the contents of the **Query history** tab to a JSON or CSV file. + +For more information about Firebolt’s **Develop Space**, see [Using the develop workspace](/Guides/query-data/using-the-develop-workspace.html). + +## [](#run-query)Run Query + +![After loading your data, you can run a query.](../../assets/images/get_started_sql_query.png) + +To run a query on your data, do the following: + +1. Select the (**+**) icon next to the **Script 2** tab to open a new tab. +2. Enter the following simple query, which fetches a list of databases associated with your account: + + ``` + SHOW CATALOGS; + ``` +3. Select **Run** to process the query. Firebolt uses the engine listed to the right of your database to run your query and its status of **Running** or **Stopped**. You can select a different engine from the dropdown menu next to the engine (![Engine icon](../../assets/images/engine-icon.png)) icon. + + If your engine is **Stopped**, Firebolt may prompt you to start your engine. Select **Start Engine**. Engine startup typically requires a few moments to complete, as Firebolt prepares your environment for data analysis. + +For more information about Firebolt’s **Develop Space**, see [Use the Develop Space](/Guides/query-data/using-the-develop-workspace.html). + +## [](#optimize-your-workflow)Optimize your workflow + +![After running a baseline query, you can optimize your workflow for better performance.](../../assets/images/get_started_sql_optimize.png) + +Firebolt uses a number of optimization strategies to reduce query times. Over small datasets like those specified in this guide, the uplift may not be noticeable. However, these strategies can **dramatically improve** query performance for larger datasets. The following sections discuss how [primary indexes](#primary-indexes) and [aggregating indexes](#aggregating-indexes) to do the following: + +- Reduce the amount of data that the query scans. +- Pre-calculate values that are used repeatedly during computations. + +### [](#primary-indexes)Primary Indexes + +One of Firebolt’s key optimization strategies is to select a primary index for columns that are used frequently in `WHERE`, `JOIN`, `GROUP_BY`, and clauses used for sorting. In Firebolt, a primary index is a type of **sparse index**. Thus, selecting the best primary index can reduce query run times significantly by reducing the data set that the query searches over. Selecting primary indexes also allows Firebolt to manage updates, deletions and insertions to tables and provide optimal query performance. + +If you have a composite primary index, the order that the columns are listed is important. Specify the column that has a large number of unique values, or high cardinality, first, followed by columns with lower cardinality. A sort order with the previous characteristics allows Firebolt to prune, or eliminate irrelevant data, so that it doesn’t have to scan it in query processing. Pruning significantly enhances query performance. + +You can create a primary index **only** when you create a table. If you want to change the primary index, you must create a new table. The following example shows how to use [CREATE TABLE](/sql_reference/commands/data-definition/create-fact-dimension-table.html) to create a new `levels` table, define the schema, and set two primary indexes: + +``` +CREATE TABLE IF NOT EXISTS levels ( + "LevelID" INT, + "Name" TEXT, + "GameID" INT, + "LevelType" TEXT, + "MaxPoints" INT, + "PointsPerLap" DOUBLE, + "SceneDetails" TEXT +) +PRIMARY INDEX "LevelID", "Name"; +``` + +In the previous code example, the primary index contains two values. The first value, `LevelID`, is required in order to create a primary index. The second value, `Name`, and any following values are optional. Firebolt will use all listed primary indexes to optimize query scans. If Name has lower cardinality than `LevelID`, then Firebolt can optimize these indexes to eliminate scanning over irrelevant data. For more information about primary indexes and sort order, see [Primary index](/Overview/indexes/primary-index.html). + +To read data into the `levels` table, enter the following into a new script tab: + +``` +COPY INTO levels +FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' +WITH TYPE = CSV +HEADER = TRUE; +``` + +### [](#aggregating-indexes)Aggregating Indexes + +Another key optimization strategy includes pre-calculating aggregate values for columns that are frequently used in functions that combine data such as `COUNT`, `SUM`, `MAX`, `MIN`, `AVG`, `JOIN`, and `GROUP BY`. Rather than computing aggregate values each time they are used in a calculation, the results are accessed from storage, which helps run queries quickly and saves compute resources. + +An aggregating index combines columns into a statistical result. You can calculate an aggregate index on an entire table, or more efficiently, calculate them over a subset of table columns. You can also use your knowledge of which dimensions and aggregate functions are used most often for your use case to predefine what table dimensions and which aggregate functions to use. + +Once you create aggregate indexes, Firebolt maintains them automatically for you. If you load new data into your table or alter it, your aggregate indexes are automatically updated. You can also have multiple aggregate indexes for a single table. When you query a table with multiple aggregate indexes, Firebolt will automatically select the best index to use to optimize performance. + +From the **tutorial** table that you created in the previous step, assume you want to run a query to look at the AVG(NumberOfLaps), grouped by LevelType. The following example code shows you how to create an aggregating index **levels\_agg\_idx** on the **LevelType** column to pre-calculate the average number of laps for each level. + +``` +CREATE AGGREGATING INDEX + levels_agg_idx +ON tutorial ( + "LevelType" + , AVG("NumberOfLaps") + ); +``` + +After you run the script, the `levels_agg_idx` aggregate index listed in the left navigation pane under **Indexes** in the **tutorial** table. Any queries that run over the tutorial table that use an average of the **NumberOfLaps** column grouped by **LevelType** will now use the `levels_agg_idx` index instead of reading the entire table to calculate it. + +For more information, see [Aggregating index](/Overview/indexes/aggregating-index.html). + +### [](#warm-data-and-cache-eviction)Warm data and cache eviction + +Another key optimization strategy is to read warm data, or data accessed from cache, rather than reading in “cold” data from an Amazon S3 bucket. Querying cold data can be significantly slower than querying warm data, particularly for large datasets that contain millions of rows or more. If you reach about 80% of your available cache, the least recently used data will be moved out of cache into an Amazon S3 bucket. + +#### [](#warm-data)Warm data + +When data is warm, Firebolt transfers data from remote storage in Amazon (S3) to a local (cache). Data is automatically warmed when you access it during a query, and stored in a solid state drive (SSD) cache. However, when you query data to warm it, you use an engine, and incur [engine consumption](/Overview/engine-consumption.html) costs. Therefore, you should use filters to warm only the data that you need to access frequently in your queries. + +The following guidance applies: + +- If you need access to all the data in a table, use `CHECKSUM` to warm the entire table as follows: + + ``` + SELECT CHECKSUM(*) FROM levels; + ``` +- If you only need a few columns that meet a certain criteria, filter them before warming the data as shown in the following code example: + + ``` + SELECT CHECKSUM("Name", "MaxPoints") FROM levels WHERE "MaxPoints" BETWEEN 100 AND 250; + ``` +- If you have a large dataset, you can divide the data into smaller segments, and execute the queries in parallel, as shown in the following code example: + + ``` + SELECT CHECKSUM("Name", "MaxPoints") FROM levels WHERE "MaxPoints" BETWEEN 0 AND 100; + SELECT CHECKSUM("Name", "MaxPoints") FROM levels WHERE "MaxPoints" BETWEEN 101 AND 200; + SELECT CHECKSUM("Name", "MaxPoints") FROM levels WHERE "MaxPoints" > 200; + ``` + +#### [](#cache-eviction)Cache eviction + +When your cache usage exceeds approximately 80% of its capacity, Firebolt automatically evicts the some data from the cache. If you query this data later, Firebolt reloads it into the cache before processing the query. The total available cache size depends on your engine’s size and family. Larger engine sizes provide more cache space, and the storage-optimized family offers more cache than the compute-optimized family. Small and medium sized engines are available for use right away. If you want to use a large or extra-large engine, reach out to [support@firebolt.io](mailto:support@firebolt.io). + +You can check the size of your cache using the following example code: + +``` +SHOW CACHE; +``` + +The previous code example shows your cache usage in GB per total cache available. + +When data is loaded into Firebolt, it is stored in units of data storage called tablets. A tablet contains a subset of a table’s rows and columns. If you reach your cache’s 80% capacity, the entire tablet that contains the least recently used data, is evicted. + +The following code example shows how to view information about the tablets that are used to store your table including the number of uncompressed and compressed bytes on disk: + +``` +SELECT * FROM information_schema.engine_tablets where table_name = 'levels'; +``` + +## [](#clean-up)Clean up + +![To avoid incurring costs, clean up data and resources when you're finished with your workflow.](../../assets/images/get_started_sql_clean.png) + +After you’ve completed the steps in this guide, avoid incurring costs associated with the getting started exercises by doing the following: + +- Stop any running engines. +- Remove data from storage. + +### [](#stop-any-running-engines)Stop any running engines + +Firebolt shows you the status of your current engine next to the engines icon (![Engine icon](../../assets/images/engine-icon.png)) under your script tab as either **Stopped** or **Running**. To shut down your engine, select your engine from the drop-down list next to the name of the engine, and then select one of the following: + +- Stop engine - Allow all of the currently running queries to finish running and then shut down the engine. Selecting this option will allow the engine to run for as long as it takes to complete all queries running on the selected engine. +- Terminate all queries and stop - Stop the engine and stop running any queries. Selecting this option stops the engine in about 20-30 seconds. + +### [](#remove-data-from-storage)Remove data from storage + +To remove a table and all of its data, enter [DROP TABLE](/sql_reference/commands/data-definition/drop-table.html) into a script tab, as shown in the following code example: + +``` +DROP TABLE levels +``` + +To remove a database and all of its associated data, do the following in the Firebolt **Develop Space**: + +- Select the database from the left navigation bar. +- Select the **More options** (![More options icon](../../assets/images/more_options_icon.png)) icon. +- Select **Delete database**. Deleting your database will permanently remove your database from Firebolt. You cannot undo this action. Select **Delete**. + +## [](#export-data)Export data + +![The following section shows how to optionally export data after cleaning up.](../../assets/images/get_started_sql_export.png) + +If you want to save your data outside of Firebolt, you can use [COPY TO](/sql_reference/commands/data-management/copy-to.html) to export data to an external table. This section shows how to set the minimal AWS permissions and use `COPY TO` to export data to an [AWS S3 bucket](https://docs.aws.amazon.com/AmazonS3/latest/userguide/Welcome.html). You may have to reach out to your administrator to obtain or change AWS permissions. + +### [](#set-permissions-to-write-to-an-aws-bucket)Set permissions to write to an AWS bucket + +Firebolt must have the following permissions to write to an AWS S3 bucket: + +1. AWS access key credentials. The credentials must be associated with a user with permissions to write objects to the bucket. Specify access key credentials using the following syntax: + +``` + CREDENTIALS = (AWS_ACCESS_KEY_ID = '' AWS_SECRET_ACCESS_KEY = '') +``` + +In the previous credentials example, `` is the AWS access key ID associated with an AWS user or an IAM role. An access key ID has the following form: `AKIAIOSFODNN7EXAMPLE`. The value `` is the AWS secret access key. A secret access key has the following form: `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`. + +1. An AWS IAM policy statement attached to a user role. Firebolt requires the following minimum permissions in the IAM policy: + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:Get*", + "s3:List*", + "s3:PutObject", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::my_s3_bucket", + "arn:aws:s3:::my_s3_bucket/*" + ] + } + ] + } + ``` + + For more information about AWS access keys and roles, see [Creating Access Key and Secret ID in AWS](/Guides/loading-data/creating-access-keys-aws.html). + +### [](#export-to-an-aws-bucket)Export to an AWS bucket + +Use [COPY TO](/sql_reference/commands/data-management/copy-to.html) select all the columns from a table and export to an AWS S3 bucket as shown in the following code example: + +``` +COPY (SELECT * FROM test_table) + TO 's3://my-bucket/path/to/data' + CREDENTIALS = + (AWS_ROLE_ARN= 'arn:aws:iam::123456789012:role/my-firebolt-role'); +``` + +In the previous code example, the role ARN ([Amazon Resource Name](https://docs.aws.amazon.com/IAM/latest/UserGuide/reference-arns.html)) identifies the AWS IAM role that specifies the access for users or services. An ARN follows the following structure: arn:aws:iam::account-id:role/role-name. Because TYPE is omitted from `COPY TO`, the file or files will be written in the default CSV format. Because `COMPRESSION` is also omitted, the output data is compressed using GZIP (\*.csv.gz) format. + +Firebolt assigns a query ID, that has the following example format `16B903C4206098FD`, to the query at runtime. If the size of the compressed output exceeds the default of `16` MB, Firebolt writes multiple GZIP files. In the following example, the size of the output is `40` MB, so Firebolt writes `4` files. + +``` +s3://my_bucket/my_fb_queries/ +16B903C4206098FD_0.csv.gz +16B903C4206098FD_1.csv.gz +16B903C4206098FD_2.csv.gz +16B903C4206098FD_3.csv.gz +``` + +## [](#next-steps)Next steps + +To continue learning about Firebolt’s architecture, capabilities, using Firebolt after your trial period, and setting up your organization, see [Resources beyond getting started](/Guides/getting-started/get-started-next.html). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_airbyte.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_airbyte.md new file mode 100644 index 0000000..5af6bdc --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_airbyte.md @@ -0,0 +1,94 @@ +# [](#overview)Overview + +![Airbyte](/assets/images/airbyte.png) + +Airbyte is an open-source data integration platform that significantly simplifies the ETL (Extract, Transform, Load) process, making it easier for users to manage and migrate their data across various sources. By providing a user-friendly interface and robust functionality, Airbyte enables seamless data movement and transformation, catering to a wide range of data integration needs. One of the key features of Airbyte is its extensive range of connectors, which allow it to integrate with numerous data sources and destinations. + +Using Airbyte’s Firebolt connector, users can efficiently and effortlessly load large amounts of data to and from Firebolt. This capability extends to integration with a wide array of data sources, thanks to Airbyte’s extensive library of connectors. Whether your data resides in cloud storage, on-premises databases, SaaS applications, or other data warehouses, Airbyte facilitates smooth and reliable data transfer between these sources and Firebolt. + +# [](#quickstart)Quickstart + +There are several [ways](https://docs.airbyte.com/deploying-airbyte) to deploy Airbyte. In this tutorial we will the easiest way to start prototyping by using a [Docker Compose](https://docs.docker.com/compose/) deployment locally. + +If you already have an airbyte deployment skip to the [configuration section](#step-2-configure-firebolt-connection-via-ui). + +#### [](#prerequisites)Prerequisites + +1. **Docker**: Ensure you have Docker installed. You can download it from [here](https://www.docker.com/products/docker-desktop). +2. **Firebolt Account**: You need an active Firebolt account. Sign up [here](https://www.firebolt.io/) if you don’t have one. +3. **Firebolt Database and Table**: Make sure you have a Firebolt database and table with data ready for querying. +4. **Firebolt Service Account**: Create a [service account](/Guides/managing-your-organization/service-accounts.html) in Firebolt and note its id and secret. + +#### [](#step-1-deploy-airbyte-locally-with-docker)Step 1: Deploy Airbyte Locally with Docker + +1. Create a new directory for your Airbyte setup: + + ``` + git clone --depth=1 https://github.com/airbytehq/airbyte.git + ``` +2. Switch to the Airbyte directory: + + ``` + cd airbyte + ``` +3. Start Airbyte by running the following command in the terminal: + + ``` + ./run-ab-platform.sh + ``` +4. Open your browser and navigate to `http://localhost:8000` to access the Airbyte UI. +5. You will be asked for a username and password. By default the username is `airbyte` and the password is `password`. Before you deploy Airbyte in production make sure to change the password. + +#### [](#step-2-configure-firebolt-connection-via-ui)Step 2: Configure Firebolt Connection via UI + +1. In the Airbyte UI, click on the **“Connections”** tab and select **“Create your first connection”**. +2. Click on **“New Destination”** and select **“Firebolt”** as the destination type. +3. Enter your Firebolt connection details: + + - Client ID: Your service account id. + - Client Secret: Your service account secret. + - Database: Your database name. + - Account: Your firebolt [account](/Guides/managing-your-organization/managing-accounts.html). + - Engine: Firebolt engine which will run the ingestion. + - Host (Optional): For non-standard use cases. Should be left blank. +4. Select replication strategy. SQL is easier to setup but S3 is more performant on production loads. See the [Airbyte doc](https://docs.airbyte.com/integrations/destinations/firebolt) for more information. +5. Save. + + ![Connection parameters](../../assets/images/airbyte-connection-parameters.png) + +#### [](#step-3-create-a-connection-in-airbyte)Step 3: Create a Connection in Airbyte + +1. In the Airbyte UI, click on the **“Connections”** tab and select **“Create your first connection”** (**“New Connection”** if you already have a connection defined). +2. Choose a source from which you want to extract data. We’ll be using **Faker** to generate some sample data. +3. Leave fields as is and click **“Set up source”**. +4. Next in the destination screen select the Firebolt destination you configured earlier. +5. Select the streams you want to replicate and sync mode (Full refresh or Incremental). To save time select only “products” stream. +6. Finally specify the frequency of your data repication or manual if you want to trigger the job in UI or via an API call. +7. Click **“Set up connection”** to start syncing data from your source to Firebolt! + + ![Streams](/assets/images/airbyte-sample-streams.png) + +#### [](#step-4-monitor-and-manage-data-syncs)Step 4: Monitor and Manage Data Syncs + +1. Use the Airbyte UI to monitor your data syncs and ensure that data is being transferred accurately and efficiently. +2. Adjust sync settings and transformations as needed to optimize your ETL process. You can leverage DBT to + + ![Sync monitoring](/assets/images/airbyte-sync-monitoring.png) + +### [](#output-schema)Output schema + +The Firebolt Destination connector is a V1 connector, meaning it works with raw data. Refer to Airbyte’s [Destination V2 document](https://docs.airbyte.com/using-airbyte/core-concepts/typing-deduping#what-is-destinations-v2) to learn about the differences. Each stream is written into its own [Fact table](/Overview/indexes/using-indexes.html#firebolt-managed-tables) in Firebolt, containing three columns: + +\*`_airbyte_ab_id`: a UUID assigned by Airbyte to each processed event. The column type is TEXT. + +- `_airbyte_emitted_at`: a TIMESTAMP indicating when the event was pulled from the source. +- `_airbyte_data`: a JSON blob representing event data, stored as TEXT, but can be parsed using [JSON functions](/sql_reference/functions-reference/JSON/). + +### [](#further-reading)Further Reading + +After setting up Airbyte with Firebolt, explore these resources to leverage additional features and enhance your data integration capabilities: + +1. Learn how to use [Firebolt Source](https://docs.airbyte.com/integrations/sources/firebolt). +2. Ensure you’re following [security guidelines](https://docs.airbyte.com/operating-airbyte/security). +3. Explore other [deployment options](https://docs.airbyte.com/deploying-airbyte). +4. Configure your [connections](https://docs.airbyte.com/cloud/managing-airbyte-cloud/configuring-connections). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_airflow.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_airflow.md new file mode 100644 index 0000000..e17c7e3 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_airflow.md @@ -0,0 +1,413 @@ +# [](#connecting-to-airflow)Connecting to Airflow + +[Apache Airflow](https://airflow.apache.org/) is a data orchestration tool that allows you to programmatically create, schedule, and monitor workflows. You can connect a Firebolt database into your data pipeline using the Airflow provider package for Firebolt. For example, you can schedule automatic incremental data ingestion into Firebolt. + +This guide explains how to install the [Airflow provider package](https://pypi.org/project/airflow-provider-firebolt/) for Firebolt, set up a connection to Firebolt resources using the Airflow user interface (UI), and create an example Directed Acyclic Graph (DAG) for common Firebolt tasks. The source code for the Airflow provider package for Firebolt is available in the [airflow-provider-firebolt](https://github.com/firebolt-db/airflow-provider-firebolt) repository on GitHub. + +## [](#prerequisites)Prerequisites + +Make sure that you have: + +- A Firebolt account. [Create a new account](/Guides/managing-your-organization/managing-accounts.html#create-a-new-account). +- A Firebolt database and engine. +- [Python](https://www.python.org/downloads/) version 3.8 or later. +- An installation of Airflow. See the [Airflow installation guide](https://airflow.apache.org/docs/apache-airflow/stable/installation/index.html). + +## [](#install-the-airflow-provider-package-for-firebolt)Install the Airflow provider package for Firebolt + +You need to install the Airflow provider package for Firebolt. This package enables Firebolt as a **Connection type** in the Airflow UI. + +1. Install the package. + + Run the following command to install the package: + + ``` + pip install airflow-provider-firebolt + ``` +2. Upgrade to the latest version. + + Run the latest version of the provider package. [Release history](https://pypi.org/project/airflow-provider-firebolt/#history) is available on PyPI. + + Use the following command to upgrade: + + ``` + pip install airflow-provider-firebolt --upgrade + ``` + + Restart Airflow after upgrading to apply the new changes. +3. Install a specific version. + + If a specific version is required, replace `1.0.0` with the desired version: + + ``` + pip install airflow-provider-firebolt==1.0.0 + ``` +4. Install the provider for AWS Managed Airflow (MWAA). + + Ensure you are using version 2 of AWS Managed Airflow (MWAA) when working with the Firebolt Airflow provider. Add `airflow-provider-firebolt` to the `requirements.txt` file following the instructions in the [MWAA Documentation.](https://docs.aws.amazon.com/mwaa/latest/userguide/working-dags-dependencies.html) + +## [](#connect-airflow-to-firebolt)Connect Airflow to Firebolt + +Create a connection object in the Airflow UI to integrate Firebolt with Airflow. + +### [](#steps-to-configure-a-connection)Steps to configure a connection + +1. Open the Airflow UI and log in. +2. Select the **Admin** menu. +3. Choose **Connections**. +4. Select the **+** button to add a new connection. +5. Choose Firebolt from the **Connection Type** list +6. Provide the details in the following table. These connection parameters correspond to built-in Airflow variables. + + Parameter Description Example value Connection id The name of the connection for the UI. `My_Firebolt_Connection` Description Information about the connection. `Connection to Firebolt database MyDatabase using engine MyFireboltDatabase_general_purpose.` Database The name of the Firebolt database to connect to. `MyFireboltDatabase` Engine The name of the engine to run queries `MyFireboltEngine` Client ID The ID of your service account. `XyZ83JSuhsua82hs` Client Secret The [secret](/Guides/loading-data/creating-access-keys-aws.html) for your service account authentication. `yy7h&993))29&%j` Account The name of your account. `developer` Extra The additional properties that you may need to set (optional). `{"property1": "value1", "property2": "value2"}` + + Client ID and secret credentials can be obtained by registering a [service account](/Guides/managing-your-organization/service-accounts.html). +7. Choose **Test** to verify the connection. +8. Once the test succeeds, select **Save**. + +## [](#create-a-dag-for-data-processing-with-firebolt)Create a DAG for data processing with Firebolt + +A DAG file in Airflow is a Python script that defines tasks and their execution order for a data workflow. The following example is an example DAG for performing the following tasks: + +- Start a Firebolt [engine](/Overview/engine-fundamentals.html). +- Create an [external table](/Guides/loading-data/working-with-external-tables.html) linked to an Amazon S3 data source. +- Create a fact table for ingested data. For more information, see [Firebolt-managed tables](/Overview/indexes/using-indexes.html#firebolt-managed-tables). +- Insert data into the fact table. +- Stop the Firebolt engine. This task is not required if your engine has `AUTO_STOP` configured + +### [](#dag-script-example)DAG script example + +The following DAG script creates a DAG named `firebolt_provider_trip_data`. It uses an Airflow connection to Firebolt named `my_firebolt_connection`. For the contents of the SQL scripts that the DAG runs, see the following [SQL script examples](#sql-script-examples). You can run this example with your own database and engine by updating the connector values in Airfow, setting the `FIREBOLT_CONN_ID` to match your connector, and creating the necessary custom variables in Airflow. + +``` +import time +import airflow +from airflow.models import DAG, Variable +from firebolt_provider.operators.firebolt \ + import FireboltOperator, FireboltStartEngineOperator, FireboltStopEngineOperator + +# Define function to get Firebolt connection parameters +def connection_params(conn_opp, field): + connector = FireboltOperator( + firebolt_conn_id=conn_opp, sql="", task_id="CONNECT") + conn_parameters = connector.get_db_hook()._get_conn_params() + return getattr(conn_parameters, field) + +# Set up the Firebolt connection ID +firebolt_conn_id = 'firebolt' +firebolt_engine_name = connection_params(firebolt_conn_id, 'engine_name') +tmpl_search_path = Variable.get("firebolt_sql_path") +default_args = { + 'owner': 'airflow', + 'start_date': airflow.utils.dates.days_ago(1) +} + +# Function to open query files saved locally. +def get_query(query_file): + return open(query_file, "r").read() + +# Define a variable based on an Airflow DAG class. +# For class parameters, see https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/dag/index.html#airflow.models.dag.DAG. +with DAG('firebolt_provider_startstop_trip_data', + default_args=default_args, + template_searchpath=tmpl_search_path, + schedule_interval=None, + catchup=False, + tags=["firebolt"]) as dag: + + # Define DAG tasks and task sequence. + # Where necessary, read local sql files using the Airflow variable. + task_start_engine = FireboltStartEngineOperator( + dag=dag, + task_id="START_ENGINE", + firebolt_conn_id=firebolt_conn_id, + engine_name=firebolt_engine_name) + + task_trip_data__external_table = FireboltOperator( + dag=dag, + task_id="task_trip_data__external_table", + sql=get_query(f'{tmpl_search_path}/trip_data__create_external_table.sql'), + firebolt_conn_id=firebolt_conn_id + ) + + task_trip_data__create_table = FireboltOperator( + dag=dag, + task_id="task_trip_data__create_table", + sql=get_query(f'{tmpl_search_path}/trip_data__create_table.sql'), + firebolt_conn_id=firebolt_conn_id + ) + task_trip_data__create_table.post_execute = lambda **x: time.sleep(10) + + task_trip_data__process_data = FireboltOperator( + dag=dag, + task_id="task_trip_data__process_data", + sql=get_query(f'{tmpl_search_path}/trip_data__process.sql'), + firebolt_conn_id=firebolt_conn_id + ) + + task_stop_engine = FireboltStopEngineOperator( + dag=dag, + task_id="STOP_ENGINE", + firebolt_conn_id=firebolt_conn_id, + engine_name=firebolt_engine_name) + + (task_start_engine >> task_trip_data__external_table >> + task_trip_data__create_table >> task_trip_data__process_data >> task_stop_engine) +``` + +This DAG showcases various Firebolt tasks as an example and is not intended to represent a typical real-world workflow or pipeline. + +### [](#define-airflow-variables)Define Airflow variables + +Airflow variables store-key value pairs that DAGs can use during execution. You can create and manage variables through the Airflow user interface (UI) or JSON documents. For detailed instructions, check out Airflow’s [Variables](https://airflow.apache.org/docs/apache-airflow/stable/concepts/variables.html) and [Managing Variables](https://airflow.apache.org/docs/apache-airflow/stable/howto/variable.html) documentation pages. + +**Example variable for SQL files** +The DAG example uses the custom variable `firebolt_sql_path` to define the directory within your Airflow home directory where SQL files are stored. The DAG reads these files to execute tasks in Firebolt. + +- **Key**: `firebolt_sql_path` +- **Value**: Path to the directory containing SQL scripts. For example, `~/airflow/sql_store`. + +**Using the variable in the DAG** +A python function in the DAG reads the SQL scripts stored in the directory defined by `firebolt_sql_path`. This allows the DAG to dynamically execute the SQL files as tasks in Firebolt. + +The following example demonstrates how the variable is accessed in the DAG script: + +``` +tmpl_search_path = Variable.get("firebolt_sql_path") + +def get_query(query_file): + with open(query_file, "r") as file: + return file.read() +``` + +### [](#sql-script-examples)SQL script examples + +Save the following SQL scripts to your `tmpl_search_path`. + +#### [](#trip_data__create_external_tablesql)trip\_data\_\_create\_external\_table.sql + +This example creates the `ex_trip_data` fact table to connect to a public Amazon S3 data store. + +``` +CREATE EXTERNAL TABLE IF NOT EXISTS ex_trip_data( + vendorid INTEGER, + lpep_pickup_datetime TIMESTAMP, + lpep_dropoff_datetime TIMESTAMP, + passenger_count INTEGER, + trip_distance REAL, + ratecodeid INTEGER, + store_and_fwd_flag TEXT, + pu_location_id INTEGER, + do_location_id INTEGER, + payment_type INTEGER, + fare_amount REAL, + extra REAL, + mta_tax REAL, + tip_amount REAL, + tolls_amount REAL, + improvement_surcharge REAL, + total_amount REAL, + congestion_surcharge REAL +) +url = 's3://firebolt-publishing-public/samples/taxi/' +object_pattern = '*yellow*2020*.csv' +type = (CSV SKIP_HEADER_ROWS = true); +``` + +#### [](#trip_data__create_tablesql)trip\_data\_\_create\_table.sql + +This example creates the `my_taxi_trip_data` fact table, to receive ingested data. + +``` +DROP TABLE IF EXISTS my_taxi_trip_data; +CREATE FACT TABLE IF NOT EXISTS my_taxi_trip_data( + vendorid INTEGER, + lpep_pickup_datetime TIMESTAMP, + lpep_dropoff_datetime TIMESTAMP, + passenger_count INTEGER, + trip_distance REAL, + ratecodeid INTEGER, + store_and_fwd_flag TEXT, + pu_location_id INTEGER, + do_location_id INTEGER, + payment_type INTEGER, + fare_amount REAL, + extra REAL, + mta_tax REAL, + tip_amount REAL, + tolls_amount REAL, + improvement_surcharge REAL, + total_amount REAL, + congestion_surcharge REAL, + SOURCE_FILE_NAME TEXT, + SOURCE_FILE_TIMESTAMP TIMESTAMP +) PRIMARY INDEX vendorid; +``` + +#### [](#trip_data__processsql)trip\_data\_\_process.sql + +An `INSERT INTO` operation ingests data into the `my_taxi_trip_data` fact table using the `ex_trip_data` external table. This example uses the external table metadata column, `$source_file_timestamp`, to retrieve records exclusively from the latest file. + +``` +INSERT INTO my_taxi_trip_data +SELECT + vendorid, + lpep_pickup_datetime, + lpep_dropoff_datetime, + passenger_count, + trip_distance, + ratecodeid, + store_and_fwd_flag, + pu_location_id, + do_location_id, + payment_type, + fare_amount, + extra, + mta_tax, + tip_amount, + tolls_amount, + improvement_surcharge, + total_amount, + congestion_surcharge, + $source_file_name, + $source_file_timestamp +FROM ex_trip_data +WHERE coalesce($source_file_timestamp > (SELECT MAX(source_file_timestamp) FROM my_taxi_trip_data), true); +``` + +## [](#example-working-with-query-results)Example: Working with query results + +The `FireboltOperator` is designed to execute SQL queries but does not return query results. To retrieve query results, use the `FireboltHook` class. The following example demonstrates how to use `FireboltHook` to execute a query and log the row count in the `my_taxi_trip_data` table. + +### [](#python-code-example-retrieiving-query-results)Python code example: Retrieiving query results + +``` +import logging + +import airflow +from airflow import DAG +from airflow.operators.python import PythonOperator +from firebolt_provider.hooks.firebolt import FireboltHook +from airflow.providers.common.sql.hooks.sql import fetch_one_handler + +# Set up the Firebolt connection ID +firebolt_conn_id = 'firebolt' +default_args = { + 'owner': 'airflow', + 'start_date': airflow.utils.dates.days_ago(1) +} + + +# Function to notify the team about the data +def notify(message: str): + logging.info(message) + + +# Function to fetch data from Firebolt and notify the team +def fetch_firebolt_data(): + hook = FireboltHook(firebolt_conn_id=firebolt_conn_id) + results = hook.run( + "SELECT count(*) FROM my_taxi_trip_data", + handler=fetch_one_handler + ) + count = results[0] + notify("Amount of data in Firebolt: " + str(count)) + + +with DAG( + 'return_result_dag', + default_args=default_args, + schedule_interval=None, # Run manually + catchup=False +) as dag: + # Define a Python operator to fetch data from Firebolt and notify the team + monitor_firebolt_data = PythonOperator( + task_id='monitor_firebolt_data', + python_callable=fetch_firebolt_data + ) + + monitor_firebolt_data +``` + +## [](#example-controlling-query-execution-timeout)Example: Controlling query execution timeout + +The Firebolt provider includes parameters to control query execution time and behavior when a timeout occurs: + +- `query_timeout`: Sets the maximum duration (in seconds) that a query can run +- `fail_on_query_timeout` - If `True`, a timeout raises a `QueryTimeoutError`. If `False`, the task terminates quietly, and the task proceeds without raising an error. + +### [](#python-code-example-using-timeout-settings)Python code example: Using timeout settings + +In this example, the `FireboltOperator` task stops execution after one second and proceeds without error. The `PythonOperator` task fetches data from Firebolt with a timeout of 0.5 seconds and raises an error if the query times out. + +``` +import airflow +from airflow.models import DAG, Variable +from airflow.operators.python import PythonOperator +from firebolt_provider.hooks.firebolt import FireboltHook +from airflow.providers.common.sql.hooks.sql import fetch_one_handler +from firebolt_provider.operators.firebolt import FireboltOperator + +# Set up the Firebolt connection ID +firebolt_conn_id = 'firebolt' +default_args = { + 'owner': 'airflow', + 'start_date': airflow.utils.dates.days_ago(1) +} +tmpl_search_path = Variable.get("firebolt_sql_path") + + +def get_query(query_file): + return open(query_file, "r").read() + +# Function to fetch data with a timeout +def fetch_with_timeout(): + hook = FireboltHook( + firebolt_conn_id=firebolt_conn_id, + query_timeout=0.5, + fail_on_query_timeout=True, + ) + results = hook.run( + "SELECT count(*) FROM my_taxi_trip_data", + handler=fetch_one_handler, + ) + print(f"Results: {results}") + +# Define the DAG +with DAG( + 'timeout_dag', + default_args=default_args, + schedule_interval=None, # Run manually + catchup=False +) as dag: + + # Firebolt operator with a timeout + + firebolt_operator_with_timeout = FireboltOperator( + dag=dag, + task_id="insert_with_timeout", + sql=get_query(f'{tmpl_search_path}/trip_data__process.sql'), + firebolt_conn_id=firebolt_conn_id, + query_timeout=1, + # Task will not fail if query times out, and will proceed to the next task + fail_on_query_timeout=False, + ) + + # Python operator to fetch data with a timeout + operator_with_hook_timeout = PythonOperator( + dag=dag, + task_id='select_with_hook_timeout', + python_callable=fetch_with_timeout, + ) + + firebolt_operator_with_timeout >> operator_with_hook_timeout +``` + +## [](#additional-resources)Additional resources + +For more information about connecting to Airflow, refer to the following resources: + +- [Managing Connections in Airflow](https://airflow.apache.org/docs/apache-airflow/stable/howto/connection.html) +- [Firebolt Airflow provider on Pypi](https://pypi.org/project/airflow-provider-firebolt/) +- [DAGs](https://airflow.apache.org/docs/apache-airflow/stable/core-concepts/dags.html) +- [airflow.models.dag](https://airflow.apache.org/docs/apache-airflow/stable/_api/airflow/models/dag/index.html#module-airflow.models.dag) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_apache_superset.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_apache_superset.md new file mode 100644 index 0000000..1866423 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_apache_superset.md @@ -0,0 +1,105 @@ +# [](#apache-superset)Apache Superset + +[Apache Superset](https://superset.apache.org) is an open-source data exploration and visualization platform that empowers users to create interactive, shareable dashboards and charts for analyzing and presenting data. It supports a wide range of data sources and provides an intuitive, web-based interface for data exploration, slicing, and dicing, with features like dynamic filtering, pivot tables, and drag-and-drop functionality. Superset also offers a rich set of visualization options and can be extended through custom plugins, making it a versatile tool for data analysts and business users to gain insights from their data and collaborate effectively. + +With its exceptional speed and scalability, Firebolt allows users to handle vast amounts of data with minimal query latency, ensuring that Superset dashboards and visualizations load quickly, even when dealing with massive datasets. This integration between Firebolt and Superset creates a powerful combination for data professionals, offering them a streamlined and efficient workflow for extracting maximum value from their data. + +Firebolt is also supported in [Preset](/Guides/integrations/connecting-to-preset.html), a fully managed cloud Superset solution. + +# [](#prerequisites)Prerequisites + +Superset can be installed in several ways, including using a pre-built [Docker container](https://superset.apache.org/docs/installation/installing-superset-using-docker-compose), building it from [source](https://superset.apache.org/docs/installation/installing-superset-from-scratch) or deploying via [Kubernetes Helm chart](https://superset.apache.org/docs/installation/running-on-kubernetes). + +The easiest way to get started is to run Superset via Docker. + +You will need: + +- [Docker](https://www.docker.com/) and [Docker Compose](https://docs.docker.com/compose/). +- [VirtualBox](https://www.virtualbox.org/) (Windows only). +- [Git](https://git-scm.com/). + +# [](#quickstart)Quickstart + +Follow this guide to setup Superset and get your first chart ready. + +### [](#setup-superset)Setup Superset + +1. Clone Superset’s GitHub [repository](https://github.com/apache/superset) + + ``` + git clone https://github.com/apache/superset.git + ``` +2. Change directory to the root of the newly cloned repository and add the Firebolt driver + + ``` + cd superset + touch ./docker/requirements-local.txt + echo "firebolt-sqlalchemy" >> ./docker/requirements-local.txt + ``` +3. Run Superset via Docker Compose + + ``` + docker compose -f docker-compose-non-dev.yml up + ``` +4. (Optional) Verify firebolt driver is present in Superset container + + ``` + docker exec -it bash + pip freeze | grep firebolt + ``` + + You should see `firebolt-sqlalchemy` in the output. + +Once your Superset is booted up you should be able to access it in http://localhost:8088/ + +> **Note:** For more installation details, refer to [Adding New Database Drivers in Docker](https://superset.apache.org/docs/databases/docker-add-drivers) in the Superset documentation. + +### [](#setup-firebolt-connection)Setup Firebolt connection + +After the initial setup in Superset User Inteface head to the `Settings -> Database connections` in the top right corner. + +![Database Connections](../../assets/images/superset-settings.png) + +On the next screen, press the `+ Database` button and select Firebolt from the dropdown. If you don’t see Firebolt in the list, please refer to the [Setup Superset](#setup-superset) section for instructions on how to install the Firebolt driver and verify that the driver is present. + +![Connect database](../../assets/images/superset-connect-a-database.png) + +The connection expects a SQLAlchemy connection string of the form: + +``` +firebolt://{client_id}:{client_secret}@{database}/{engine_name}?account_name={account_name} +``` + +To authenticate, use a service account ID and secret. A service account is identified by a `client_id` and a `client_secret`. Learn how to generate an ID and secret [here](/Guides/managing-your-organization/service-accounts.html). + +Account name must be provided, you can learn about accounts in [Manage accounts](/Guides/managing-your-organization/managing-accounts.html) section. + +![Credentials](../../assets/images/superset-firebolt-uri.png) + +Click the Test Connection button to confirm things work end to end. If the connection looks good, save the configuration by clicking the Connect button in the bottom right corner of the modal window. Now you’re ready to start using Superset! + +### [](#build-your-first-chart)Build your first chart + +> **Note:** This section assumes you have followed Firebolt [tutorial](/Guides/getting-started/) and loaded a sample data set into your database. + +Now that you’ve configured Firebolt as a data source, you can select specific tables (Datasets) that you want to see in Superset. + +Go to Data -> Datasets and select `+ Dataset`. There you can select your sample table by specifying Firebolt as your Database, your schema and the table name you chose. + +![Dataset](/assets/images/superset-dataset-config.png) + +Press “Create Dataset and Create Chart”. On the next screen you can select your desired chart type. For this tutorial we will go with a simple Bar Chart. + +![Chart type](/assets/images/superset-char-type-select.png) + +In the next screen you can drag and drop your table columns into Metrics and Dimensions, specify filters or sorting orders. We will plot max play time per level grouping it by level type and sorting the x-axis in ascending order. + +![Chart](/assets/images/superset-sample-chart.png) + +Your first chart is ready! You can now save it, add more data to it or change its type. You can also start building a dashboard with different charts telling a story. Learn more about this functionality and more by following the links below. + +# [](#further-reading)Further reading + +- [Creating your first Dashboard](https://superset.apache.org/docs/creating-charts-dashboards/creating-your-first-dashboard). +- [Exploring data](https://superset.apache.org/docs/creating-charts-dashboards/exploring-data). +- [Preset](https://preset.io/) - managed Superset. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_paradime.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_paradime.md new file mode 100644 index 0000000..2162cde --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_paradime.md @@ -0,0 +1,82 @@ +# [](#integrate-paradime-with-firebolt)Integrate Paradime with Firebolt + +![Paradime logo](../../assets/images/paradime_logo.png) + +[Paradime](https://www.paradime.io/) is a unified platform for data science and analytics that streamlines workflows for data teams. It offers a collaborative workspace where data scientists and analysts can explore, analyze, and visualize data across multiple tools and environments. Paradime integrates with tools including Jupyter notebooks, SQL editors, and Tableau. You can use the Paradime connector to link the Paradime platform directly to Firebolt’s cloud data warehouse. This connection allows you to run SQL queries, visualize results, and collaborate with team members all within the Paradime workspace. + +This guide shows you how to connect Paradime to Firebolt using the Paradime user interface (UI). You must have a Firebolt account, a Firebolt service account, access to a Firebolt database, and an account with Paradime. These instructions build on the steps in Paradime’s [Getting Started with your Paradime Workspace](https://docs.paradime.io/app-help/guides/paradime-101/getting-started-with-your-paradime-workspace) guide, providing Firebolt-specific configuration details. + +Topics: + +- [Prerequisites](#prerequisites) +- [Create a Paradime workspace](#create-a-paradime-workspace). +- (Optional) [Create a schedule](#create-a-schedule-optional). + +## [](#prerequisites)Prerequisites + +Before you can connect Paradime to Firebolt, you must have the following: + +1. **Firebolt Account**: Ensure that you have access to an active Firebolt account. If you don’t have access, you can [sign up for an account](https://www.firebolt.io/sign-up). For more information about how to register with Firebolt, see [Get started with Firebolt](/Guides/getting-started/). +2. **Service Account**: You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt. +3. **Firebolt Database**: You must have access to a Firebolt database. If you don’t have access, you can [create a database](/Guides/getting-started/get-started-sql.html#create-a-database). +4. **Paradime Account**: You must have access to an active Paradime account. If you don’t have access, you can [sign up](https://app.paradime.io) for one. + +## [](#create-a-paradime-workspace)Create a Paradime workspace + +Create a Paradime workspace to connect to Firebolt as follows: + +01. In the Paradime UI, navigate to your account profile in the upper-right corner of the page. +02. Select **Profile Settings**. +03. In the **Workspaces** window, select the **New Workspace** button. +04. Enter a descriptive name for your workspace in the text box under **Name**. +05. Select **Create Workspace**. +06. Select **Continue**. +07. Select the most recent dbt-core version from the drop-down list. +08. Select **Continue**. +09. Select a dbt repository. You can either use an existing data build tool ([dbt](https://www.getdbt.com/blog/what-exactly-is-dbt)) repository or fork Firebolt’s sample [Jaffle Shop](https://github.com/firebolt-db/jaffle_shop_firebolt) repository from GitHub. Paradime supports the following providers: Azure DevOps, Bitbucket, GitHub, and GitLab. +10. Select **Next**. +11. Enter the SSH URI for your repository in the text box under **Repository URI**. Copy the key that appears under the **Deploy Key**. +12. Add the new deploy key to your dbt repository and allow write access. The following are resources for providers supported by Paradime: + + - [Add the deploy key](https://docs.github.com/en/authentication/connecting-to-github-with-ssh/managing-deploy-keys#set-up-deploy-keys) in **github**. + - [Add a deployment key](https://www.atlassian.com/blog/bitbucket/deployment-keys) in **Bitbucket**. + - Use [deploy keys](https://docs.gitlab.com/ee/user/project/deploy_keys/) in **gitlab**. + - [Use SSH key authentication](https://learn.microsoft.com/en-us/azure/devops/repos/git/use-ssh-keys-to-authenticate?view=azure-devops) to connect with **Azure DevOps**. +13. Select **Continue**. +14. If your repository connected successfully, select **Continue**. +15. Select **Firebolt** from the choices under **Warehouse connection**. +16. Under **Connection Settings**, enter the following: + + 1. **Profile Name** – The name of a [connection profile](https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles) that is defined in `dbt_project.yaml` by a workspace administrator, and contains configurations including credentials to connect to a data warehouse. For more information, see Paradime’s [Setting up your profile](https://docs.getdbt.com/docs/core/connect-data-platform/connection-profiles#setting-up-your-profile) guide. + 2. **Target** – Specify the [target variable](https://docs.getdbt.com/reference/dbt-jinja-functions/target) that contains information about your data warehouse connection including its name, schema, and type. + 3. **Host Name** – Enter `api.app.firebolt.io`. +17. Under **Development Credentials**, enter the following: + + 1. **Client Id** – Enter your Firebolt [service account ID](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id). Do not enter your Firebolt login email. + 2. **Client Secret** – Enter your Firebolt [service account secret](/Guides/managing-your-organization/service-accounts.html#generate-a-secret). Do not enter your Firebolt password. + 3. **Account Name** – Enter your Firebolt [account name](/Guides/managing-your-organization/managing-accounts.html). + 4. **Engine Name** – Enter the name of the engine where you want to run your queries. + 5. **Database Name** – Specify the Firebolt database name. + 6. Select **Test Connection** to connect to Firebolt and authenticate. + 7. Select **Next**. + +For more information about the previous connection settings, see Paradime’s documentation to [add a development connection](https://docs.paradime.io/app-help/documentation/settings/connections/development-environment/firebolt). + +## [](#create-a-schedule-optional)Create a schedule (Optional) + +Paradime offers a scheduling feature using a [Bolt user interface](https://docs.paradime.io/app-help/documentation/bolt) to automatically run dbt commands on a specified interval or event. You can use Bolt to run a dbt job in a production environment, in a test environment prior to merging changes to production, or in an environment that runs jobs only on changed models. + +To create a new schedule: + +1. Login to your [Paradime account](https://app.paradime.io/?target=main-app). +2. Select **Bolt** from the left navigation bar. +3. Select **+ New Schedule**. +4. Select a pre-configured template from a list of popular Bolt templates or create a new schedule using a blank template. For information about how to configure settings in a Paradime schedule, see [Schedule Fields](https://docs.paradime.io/app-help/guides/paradime-101/running-dbt-in-production-with-bolt/creating-bolt-schedules#ui-based-schedule-fields). +5. Select **Publish**. +6. To view the new schedule, select **Bolt** from the left navigation pane. + +# [](#additional-resources)Additional resources + +- Learn about the [Paradime integrated development Environment](https://docs.paradime.io/app-help/guides/paradime-101/getting-started-with-the-paradime-ide). +- Learn to use the [Bolt scheduler](https://docs.paradime.io/app-help/bolt-scheduler/running-dbt-tm-in-production/creating-bolt-schedules) to run your dbt jobs. +- Learn how to [manage your Bolt schedule](https://docs.paradime.io/app-help/documentation/bolt/managing-schedules). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_preset.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_preset.md new file mode 100644 index 0000000..a36abbe --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_to_preset.md @@ -0,0 +1,66 @@ +# [](#preset)Preset + +[Preset](https://preset.io/) is a cloud-hosted data exploration and visualization platform built on top of the popular open-source project, [Apache Superset](https://superset.apache.org/). This fully managed service makes it easy to run Superset at scale with enterprise-ready security, reliability, and governance. + +Boasting exceptional speed and scalability, Firebolt enables users to adeptly manage substantial data volumes with minimal query latency. The integration with Preset establishes a strong partnership for data professionals, presenting them with a streamlined and efficient workflow. This collaboration ensures prompt loading of Preset dashboards and visualizations, even when confronted with extensive datasets, thereby facilitating the extraction of maximum value from their data. + +# [](#prerequisites)Prerequisites + +Preset is a managed service so most of the deployment requirements are handled by them. + +You will only need: + +- To [register](https://manage.app.preset.io/starter-registration/) a Preset account. +- To have a Firebolt account and service account [credentials](/Guides/managing-your-organization/service-accounts.html). +- [Load data](/Guides/loading-data/loading-data.html) you want to visualise. + +Make sure that your [service account’s network policy](https://docs.firebolt.io/Guides/managing-your-organization/service-accounts.html#edit-your-service-account-using-the-ui) allows connections from [Preset IPs](https://docs.preset.io/docs/connecting-your-data). + +# [](#quickstart)Quickstart + +### [](#create-a-workspace)Create a workspace + +A workspace is an organizational unit, accessible by team members, that is created for a specific purpose. You can read Preset’s [guidance](https://docs.preset.io/docs/about-workspaces) on workspaces to learn more. + +1. To Create a Workspace, navigate to the empty card and select + Workspace. + + ![Create Workspace](../../assets/images/preset-create-workspace-click.png) +2. Define Workspace name and settings + + ![Add new Workspace](../../assets/images/preset-add-new-workspace.png) +3. Save the workspace and enter it by clicking the card. + +### [](#setup-firebolt-connection)Setup Firebolt connection + +After the initial setup in Preset User Inteface head to the `Settings -> Database connections` in the top right corner. + +![Database Connections](../../assets/images/preset-settings.png) + +On the next screen, press the `+ Database` button and select Firebolt from the dropdown. + +![Connect database](../../assets/images/superset-connect-a-database.png) + +The connection expects a SQLAlchemy connection string of the form: + +``` +firebolt://{client_id}:{client_secret}@{database}/{engine_name}?account_name={account_name} +``` + +To authenticate, use a service account ID and secret. A service account is identified by a `client_id` and a `client_secret`. Learn how to generate an ID and secret [here](/Guides/managing-your-organization/service-accounts.html). + +Account name must be provided, you can learn about accounts in [Manage accounts](/Guides/managing-your-organization/managing-accounts.html) section. + +![Credentials](../../assets/images/superset-firebolt-uri.png) + +Click the Test Connection button to confirm things work end to end. If the connection looks good, save the configuration by clicking the Connect button in the bottom right corner of the modal window. Now you’re ready to start using Preset! + +### [](#build-your-first-chart)Build your first chart + +To build a chart you can follow our guide in the [Superset section](/Guides/integrations/connecting-to-apache-superset.html#build-your-first-chart), as the Preset works identically. + +# [](#further-reading)Further reading + +- [Creating a chart](https://docs.preset.io/docs/creating-a-chart) walkthrough. +- [Creating a Dashboard](https://docs.preset.io/docs/creating-a-dashboard). +- [Collaboration features of Preset](https://docs.preset.io/docs/sharing-and-collaboration). +- [Storytelling in charts](https://docs.preset.io/docs/storytelling-with-charts-and-dashboards-mini-guide). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_with_dbt.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_with_dbt.md new file mode 100644 index 0000000..422e2a0 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_connecting_with_dbt.md @@ -0,0 +1,213 @@ +# [](#overview)Overview + +![DBT](/assets/images/dbt-logo.png) + +[DBT](https://www.getdbt.com), or Data Build Tool, is a framework designed for managing and executing data transformations within modern data warehousing architectures. It facilitates the development and deployment of SQL-based transformations in a version-controlled environment, enabling collaboration and ensuring reproducibility of data pipelines. DBT streamlines the process of transforming raw data into analytics-ready datasets, accelerating the delivery of insights. + +The Firebolt adapter for dbt brings together dbt’s state-of-the-art development tools and Firebolt’s next-generation analytics performance. On top of dbt’s core features, the adapter offers native support for all of Firebolt’s index types and has been specifically enhanced to support ingestion from S3 using Firebolt’s external tables mechanics. + +# [](#prerequisites)Prerequisites + +There are two ways to deploy DBT: self-hosted [DBT Core](https://docs.getdbt.com/docs/introduction#dbt-core) and managed [DBT Cloud](https://docs.getdbt.com/docs/cloud/about-cloud/dbt-cloud-features). + +This guide shows how to set up a local installation of [DBT Core](https://docs.getdbt.com/docs/introduction#dbt-core). This guide uses Python’s `pip` package manager, but you can use the following ways to install DBT: [Homebrew](https://docs.getdbt.com/docs/core/homebrew-install), [Docker](https://docs.getdbt.com/docs/core/docker-install), and from [source](https://docs.getdbt.com/docs/core/source-install). + +You will need the following: + +- A GitHub account. +- Python 3.8+. + +# [](#quickstart)Quickstart + +This guide shows you how to set up DBT with Firebolt and run your first DBT [model](https://docs.getdbt.com/docs/build/models). + +### [](#setup-dbt-core)Setup DBT Core + +1. Create a new Python [virtual environment](https://docs.python.org/3/library/venv.html), as shown in the following script example: + + ``` + python3 -m venv dbt-env + ``` +2. Activate your `venv`, as shown in the following script example: + + ``` + source dbt-env/bin/activate + ``` +3. Install Firebolt’s [adapter](https://github.com/firebolt-db/dbt-firebolt) for DBT, as shown in the following script example: + + ``` + python -m pip install dbt-firebolt + ``` +4. (Optional) Check that both dbt packages are installed: + + ``` + python -m pip list | grep dbt + ``` + + This command should return `dbt-core` and `dbt-firebolt` and their respective versions. + +### [](#setup-connection-to-firebolt)Setup connection to Firebolt + +DBT uses a `profiles.yml` file to store the connection information. This file generally lives outside of your dbt project to avoid checking in sensitive information in to version control. + +The usual place to create this file on Mac and Linux is `~/.dbt/profiles.yml`. + +1. Open `~/.dbt/profiles.yml` with your preferred text editor. +2. Paste the following sample configuration: + + ``` + jaffle-shop: + target: dev + outputs: + dev: + type: firebolt + client_id: "" + client_secret: "" + database: "" + engine_name: "" + account_name: "" + schema: "" + ``` +3. Replace the placeholders with your account’s information. + + `` and `` are key and secret of your service account. If you don’t have one, follow the steps in the [Manage service accounts](/Guides/managing-your-organization/service-accounts.html) page to learn how to set one up. + + `` and `` are the Firebolt’s database and engine that you want your queries to run. + + `` is a Firebolt account that you’re connected to. Learn more [here](/Guides/managing-your-organization/managing-accounts.html). + + `` is a prefix prepended to your table names. Since Firebolt does not support custom schemas, this prefix serves as a [workaround](https://docs.getdbt.com/docs/core/connect-data-platform/firebolt-setup#supporting-concurrent-development) to prevent table name conflicts during concurrent development. + +### [](#setup-jaffle-shop-a-sample-dbt-project)Setup Jaffle Shop, a sample dbt project + +`jaffle_shop` is a fictional ecommerce store. This dbt project transforms raw data from an app database into a customers and orders model ready for analytics. [This version](https://github.com/firebolt-db/jaffle_shop_firebolt) is designed to showcase Firebolt’s integration with DBT. + +1. Clone `jaffle-shop-firebolt` repository and change to the newly created directory, as follows: + + ``` + git clone https://github.com/firebolt-db/jaffle_shop_firebolt.git + cd jaffle_shop_firebolt + ``` +2. Ensure your profile is setup correctly: + + ``` + dbt debug + ``` + + If you’re seeing an error here, check that your `profiles.yml` is [set up correctly](#setup-connection-to-firebolt), is in the right directory on your system, and that the [engine](/Guides/operate-engines/operate-engines.html). is running. Also check that you’re still in `dbt-env` virtual Python environment that we’ve [setup earlier](#setup-dbt-core) and that both packages are present. +3. Install dependent packages: + + ``` + dbt deps + ``` +4. Run the external table model. If your database is not in `us-east-1` AWS region then refer to the [Readme](https://github.com/firebolt-db/jaffle_shop_firebolt) on how to copy the files. + + ``` + dbt run-operation stage_external_sources + ``` +5. Load sample CSV in your database: + + ``` + dbt seed + ``` +6. Run the models: + + ``` + dbt run + ``` + +You should now see the `customers` and `orders` tables in your database, created using dbt models. From here you can explore more of DBT’s capabilities, including incremental models, documentation generation, and more, by following the official guides in the section below. + +# [](#limitations)Limitations + +Not every feature of DBT is supported in Firebolt. You can find an up-to-date list of features in the [adapter documentation](https://github.com/firebolt-db/dbt-firebolt?tab=readme-ov-file#feature-support). + +# [](#external-table-loading-strategy)External table loading strategy + +In the previous Jaffle Shop example we used a public Amazon S3 bucket to load data. If your bucket contains sensitive data, you will want to restrict access. Follow our [guide](/Guides/loading-data/creating-access-keys-aws.html) to set up AWS authentication using an ID and secret key. + +In your `dbt_project.yml`, you can specify the credentials for your external table in fields `aws_key_id` and `aws_secret_key`, as shown in the following code example: + +``` +sources: + - name: firebolt_external + schema: "" + loader: S3 + + tables: + - name: + external: + url: 's3:///' + object_pattern: '' + type: '' + credentials: + aws_key_id: + aws_secret_key: + object_pattern: '' + compression: '' + partitions: + - name: + data_type: + regex: '' + columns: + - name: + data_type: +``` + +To use external tables, you must define a table as external in your `dbt_project.yml` file. Every external table must contain the fields: `url`, `type`, and `object_pattern`. The Firebolt external table [specification](/sql_reference/commands/data-definition/create-external-table.html) requires fewer fields than those specified in the dbt documentation. + +# [](#copy-loading-strategy)“Copy” loading strategy + +You can also use [COPY FROM](/sql_reference/commands/data-management/copy-from.html) to load data from Amazon S3 into Firebolt. It has a simple syntax and doesn’t require an exact match with your source data. `COPY_FROM` does not create an intermediate table and writes your data straight into Firebolt so you can start working with it right away. + +The copy syntax in dbt closely adheres to the [syntax](/sql_reference/commands/data-management/copy-from.html#syntax) in Firebolt’s `COPY_FROM`. + +To use `COPY FROM` instead of creating an external table, set `strategy: copy` in your external source definition. For backwards compatibility, if no strategy is specified, the external table strategy is used by default. + +``` + +sources: + - name: s3 + tables: + - name: + external: + strategy: copy + url: 's3:///' + credentials: + aws_key_id: + aws_secret_key: + options: + object_pattern: '' + type: 'CSV' + auto_create: true +``` + +You can also include the following options: + +``` +options: + object_pattern: '' + type: 'CSV' + auto_create: true + allow_column_mismatch: false + max_errors_per_file: 10 + csv_options: + header: true + delimiter: ',' + quote: DOUBLE_QUOTE + escape: '\' + null_string: '\\N' + empty_field_as_null: true + skip_blank_lines: true + date_format: 'YYYY-MM-DD' + timestamp_format: 'YYYY-MM-DD HH24:MI:SS' +``` + +In the previous code example, `csv_options` are indented. For detailed descriptions of these options and their allowed values, refer to the [parameter specification](/sql_reference/commands/data-management/copy-from.html#parameters). + +# [](#further-reading)Further reading + +- [Configuring Firebolt-specific features](https://docs.getdbt.com/reference/resource-configs/firebolt-configs). +- [Incremental models](https://docs.getdbt.com/docs/build/incremental-models). +- [Data tests](https://docs.getdbt.com/docs/build/data-tests). +- [Documenting your models](https://docs.getdbt.com/docs/collaborate/documentation). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_cube_js.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_cube_js.md new file mode 100644 index 0000000..7ff4d34 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_cube_js.md @@ -0,0 +1,98 @@ +# [](#overview)Overview + +![Cube](/assets/images/cube-js-logo.png) + +Cube.js is an open-source analytical API platform that empowers developers to build custom and scalable analytics solutions. By acting as an intermediary between your data sources and front-end applications, Cube.js simplifies the process of querying large datasets and ensures efficient data management and visualization. + +Integrating Cube.js with Firebolt significantly enhances the data processing capabilities of your analytics stack. Firebolt’s ability to execute complex queries with minimal latency aligns perfectly with Cube.js’s goal of delivering fast and responsive analytics. As a result, users benefit from a seamless and highly performant analytics experience, making it an ideal solution for businesses looking to scale their data operations without compromising on speed or efficiency. + +# [](#quickstart-connecting-cubejs-to-firebolt)Quickstart: Connecting Cube.js to Firebolt + +Follow these steps to quickly connect Cube.js to Firebolt and start building powerful analytics solutions using Docker. For this demo we’ll be using [Cube Core](https://cube.dev/docs/product/getting-started/core). For other deployment options follow the Cube [documentation](https://cube.dev/docs/product/deployment). + +#### [](#prerequisites)Prerequisites + +1. **Docker**: Ensure you have Docker installed. You can download it from [here](https://www.docker.com/products/docker-desktop). +2. **Firebolt Account**: You need an active Firebolt account. Sign up [here](https://www.firebolt.io/) if you don’t have one. +3. **Firebolt Database and Table**: Make sure you have a Firebolt database and table with data ready for querying. Follow our [Getting started tutorial](/Guides/getting-started/) to set up some sample data. +4. **Firebolt Service Account**: Create a [service account](/Guides/managing-your-organization/service-accounts.html) in Firebolt and note its id and secret. + +#### [](#step-1-create-a-cubejs-project-with-docker)Step 1: Create a Cube.js Project with Docker + +1. Create a new directory for your Cube.js project: + + ``` + mkdir cubejs-firebolt + cd cubejs-firebolt + touch docker-compose.yml + ``` +2. Create a `docker-compose.yml` file with the following content: + + ``` + version: "2.2" + + services: + cube: + image: cubejs/cube:latest + ports: + - 4000:4000 + - 15432:15432 + environment: + CUBEJS_DEV_MODE: "true" + volumes: + - .:/cube/conf + ``` + +#### [](#step-2-start-cubejs)Step 2: Start Cube.js + +1. Run the Cube.js development server using Docker Compose: + + ``` + docker compose up -d + ``` +2. Open your browser and navigate to `http://localhost:4000`. You should see the Cube.js [playground](https://cube.dev/docs/product/workspace/playground). + +#### [](#step-3-configure-firebolt-connection-via-ui)Step 3: Configure Firebolt Connection via UI + +The Playground has a database connection wizard that loads when Cube is first started up and no .env file is found. After database credentials have been set up, an .env file will automatically be created and populated with credentials. + +1. Select **Firebolt** as the database type. +2. Enter your Firebolt credentials: + + - **Client ID**: Your service account ID + - **Client Secret**: Your service account secret + - **Database**: Your Firebolt database name + - **Account**: Your [account](/Guides/managing-your-organization/managing-accounts.html) name + - **Engine Name**: Your Firebolt engine name +3. Click “Apply” to set up the connection + +#### [](#step-4-generate-schema-using-ui)Step 4: Generate Schema Using UI + +You should see tables available to you from the configured database + +1. Select the `levels` table. +2. After selecting the table, click Generate Data Model and pick either YAML (recommended) or JavaScript format. +3. Click build. + + ![Data Model](/assets/images/cube-data-model-generation.png) + +You can start exploring your data! + +#### [](#step-5-query-data-in-playground)Step 5: Query data in Playground + +Select measures, dimensions and filters to explore your data! + +![Playground](../../assets/images/cube-playground-example.png) + +Congratulations! You have successfully connected Cube.js to Firebolt and can now start building high-performance analytics solutions. For more detailed configuration and advanced features, refer to the [Cube.js documentation](https://cube.dev/docs) and [Firebolt documentation](https://docs.firebolt.io/). + +### [](#further-reading)Further Reading + +After setting up Cube.js with Firebolt, you can explore and leverage several powerful features to enhance your analytics capabilities. Here are some resources to help you get started: + +1. **Cube.js Data Blending**: Understand how to combine data from different sources for more comprehensive analysis. [Cube.js Data Blending Documentation](https://cube.dev/docs/product/data-modeling/concepts/data-blending) +2. **Cube.js Security**: Implement row-level security to ensure your data is accessed appropriately. [Cube.js Security Documentation](https://cube.dev/docs/security) +3. **Cube.js API**: Explore the Cube.js REST API to programmatically access your data and build custom integrations. [Cube.js API Reference](https://cube.dev/docs/rest-api) +4. **Cube.js Dashboard App**: Build and deploy powerful dashboards using Cube.js and your favorite front-end frameworks. [Cube.js Dashboard App Documentation](https://cube.dev/docs/dashboard-app) + +These resources will help you unlock the full potential of Cube.js and create robust, high-performance analytics solutions. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_dbeaver.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_dbeaver.md new file mode 100644 index 0000000..f82189f --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_dbeaver.md @@ -0,0 +1,59 @@ +# [](#integrate-with-dbeaver)Integrate with DBeaver + +![DBeaver logo](../../assets/images/DBeaver-logo.png) + +DBeaver is a free, open-source database administration tool that supports multiple database types. It provides a graphical interface for managing databases, running queries, and analyzing data. DBeaver is widely used for database development, troubleshooting, and administration, making it a versatile choice for both developers and database administrators. You can connect DBeaver to Firebolt using the [Firebolt JDBC driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html). + +- [Prerequisites](#prerequisites) +- [Add the Firebolt JDBC Driver in DBeaver](#add-the-firebolt-jdbc-driver-in-dbeaver) +- [Connect to Firebolt in DBeaver](#connect-to-firebolt-in-dbeaver) +- [Query Firebolt in DBeaver](#query-firebolt-in-dbeaver) +- [Additional Resources](#additional-resources) + +## [](#prerequisites)Prerequisites + +You must have the following prerequisites before you can connect your Firebolt account to DBeaver: + +- **Firebolt account** – You need an active Firebolt account. If you do not have one, you can [sign up](https://go.firebolt.io/signup) for one. +- **Firebolt database and engine** – You must have access to a Firebolt database. If you do not have access, you can [create a database](/Guides/getting-started/get-started-sql.html#create-a-database) and then [create an engine](/Guides/getting-started/get-started-sql.html#create-an-engine). +- **Firebolt service account** – You must have an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html) for programmatic access, along with its ID and secret. +- **Sufficient permissions** – Your service account must be [associated](/Guides/managing-your-organization/service-accounts.html#create-a-user) with a user. The user should have [USAGE](/Overview/Security/Role-Based%20Access%20Control/database-permissions/) permission to query your database, and [OPERATE](/Overview/Security/Role-Based%20Access%20Control/engine-permissions.html) permission to start and stop an engine if it is not already started. It should also have at least USAGE and SELECT [permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/schema-permissions.html) on the schema you are planning to query. +- **DBeaver installed** – You must have downloaded and installed [DBeaver](https://dbeaver.io/download/). + +## [](#add-the-firebolt-jdbc-driver-in-dbeaver)Add the Firebolt JDBC Driver in DBeaver + +To connect to Firebolt, you must add the Firebolt JDBC driver to DBeaver as follows: + +1. Download the [Firebolt JDBC driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html#download-the-jar-file). +2. In the DBeaver user interface (UI), under **Database**, select **Driver Manager**. +3. In **Driver Manager**, select **New** and enter the following parameters: + + - **Driver Name**: `Firebolt` + - **Class Name**: `com.firebolt.FireboltDriver` +4. Select the **Libraries** tab. +5. Select **Add File**, and then select the JDBC driver you downloaded in the first step. +6. Select **Close**. + +## [](#connect-to-firebolt-in-dbeaver)Connect to Firebolt in DBeaver + +To connect to Firebolt, you must configure a new database connection in DBeaver as follows: + +1. In DBeaver, select **Database**, then **New Database Connection**. +2. Enter `Firebolt` in the search box, then select it from the list. +3. Select **Next>**. +4. Enter the connection parameters in the **Main** tab as follows: + + Parameter Description **JDBC URL** Use `jdbc:firebolt:?engine=&account=` replacing `` with your Firebolt [database name](/Overview/indexes/using-indexes.html#databases), `` with your [engine name](/Guides/getting-started/get-started-sql.html#create-an-engine) and `` with your [account name](/Guides/managing-your-organization/managing-accounts.html). **Username** Your Firebolt [service account](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id) ID. **Password** Your Firebolt [service account](/Guides/managing-your-organization/service-accounts.html#generate-a-secret) secret. +5. Select **Test Connection** to verify the connection. Ensure your Firebolt database is running before testing. +6. If the connection is successful, select **Finish**. + +## [](#query-firebolt-in-dbeaver)Query Firebolt in DBeaver + +1. In the database navigator, right-click or open the context menu of your Firebolt connection, select **SQL Editor**, then select **New SQL Script**. +2. Enter SQL queries into the SQL editor to interact with your Firebolt database. + +## [](#additional-resources)Additional Resources + +- Learn more about the [Firebolt JDBC driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html). +- Explore [DBeaver’s documentation](https://dbeaver.io/documentation/) for details on its UI, integrations, tools, and features. +- Discover other tools that [Firebolt integrates](/Guides/integrations/integrations.html) with. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_estuary.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_estuary.md new file mode 100644 index 0000000..18b1551 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_estuary.md @@ -0,0 +1,127 @@ +# [](#integrate-estuary-flow-with-firebolt)Integrate Estuary Flow with Firebolt + +![Estuary logo](../../assets/images/estuary.png) + +Estuary Flow is a real-time data integration platform designed to streamline the movement and transformation of data between diverse sources and destinations. It provides an event-driven architecture and a user-friendly interface for building pipelines with minimal effort. You can use Flow to set up pipelines to load data from various sources, such as cloud storage and databases, into Firebolt’s cloud data warehouse for low-latency analytics. + +This guide shows you how to set up a Flow pipeline that automatically moves data from your Amazon S3 bucket to your Firebolt database using the Estuary Flow user interface (UI). You must have access to an Estuary Flow account, an Amazon S3 bucket, and a Firebolt service account. + +Topics: + +- [Integrate Estuary Flow with Firebolt](#integrate-estuary-flow-with-firebolt) + + - [Prerequisites](#prerequisites) + - [Configure your Estuary Flow source](#configure-your-estuary-flow-source) + - [Configure your Estuary Flow destination](#configure-your-estuary-flow-destination) + - [Monitor your materialization](#monitor-your-materialization) + - [Validate your materialization](#validate-your-materialization) + - [Additional resources](#additional-resources) + +## [](#prerequisites)Prerequisites + +1. **Estuary Flow account** – You must have access to an active Estuary Flow account. If you do not have access, you can [sign up](https://www.estuary.dev) with Estuary. +2. **Amazon S3 bucket** – you must have access to the following: + + - An [AWS Access Key ID and AWS Secret Access Key](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html) for an Amazon S3 bucket. + - The name and path to an [Amazon S3 bucket](https://aws.amazon.com/s3/) that contains your data. +3. **Firebolt service account** – + + - Access to an organization in Firebolt. If you don’t have access, you can [create an organization](/Guides/managing-your-organization/creating-an-organization.html). + - Access to a Firebolt database and engine. If you don’t have access, you can [create a database](/Guides/getting-started/get-started-sql.html#create-a-database) and [create an engine](/Guides/getting-started/get-started-sql.html#create-an-engine). + - Access to a Firebolt service account, which is used for programmatic access, its [service account ID](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id) and [secret](/Guides/managing-your-organization/service-accounts.html#generate-a-secret-using-the-ui). If you don’t have access, you can [create a service account](/Guides/managing-your-organization/service-accounts.html#create-a-service-account). + +## [](#configure-your-estuary-flow-source)Configure your Estuary Flow source + +To set up an Estuary Flow pipeline that automatically moves data from your Amazon S3 bucket, you must create a capture that defines how and where data should be collected. Create a capture for the Estuary Flow source as follows: + +1. Sign in to your [Estuary Flow Dashboard](https://dashboard.estuary.dev). +2. Select **Sources** from the left navigation pane. +3. In the **Sources** window, select **+ NEW CAPTURE**. +4. From the list of available connectors, navigate to **Amazon S3**, and select **Capture**. +5. Under **Capture Details**, enter a descriptive name for your capture in the text box under **Name**. +6. Under **Endpoint Config**, enter the following: + + 1. **AWS Access Key ID** – The AWS account ID associated with the Amazon S3 bucket containing your data. + 2. **AWS Secret Access Key** – The AWS secret access key associated with the Amazon S3 bucket containing your data. + 3. **AWS Region** – The [AWS region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) that contains your Amazon S3 bucket. For example: `us-east-1`. + 4. **Bucket** – The name of your Amazon S3 bucket. For example, `firebolt-publishing-public`. + 5. **Prefix** (Optional) – A folder or key prefix to restrict the data to a specific path within the bucket. An example prefix structure follows: `/help_center_assets/firebolt_sample_dataset/levels.csv`. + 6. **Match Keys** (Optional) – Use a filter to include only specific object keys under the prefix, narrowing the capture’s scope. +7. Select the **NEXT** button in the upper-right corner of the page. +8. Test and save your connection as follows: + + 1. Select **TEST** in the upper-right corner of the page. Estuary will run a test for your capture and display **Success** if it completes successfully. + 2. Select **CLOSE** in the bottom-right corner of the page. + 3. Select the **SAVE AND PUBLISH** button in the upper-right corner of the page. Estuary will test, save, and publish your capture and display **Success** if it completes successfully. + 4. Select **CLOSE** in the bottom-right corner of the page. + +## [](#configure-your-estuary-flow-destination)Configure your Estuary Flow destination + +To set up an Estuary Flow pipeline that automatically moves data from your Amazon S3 bucket, you must create a materialization that defines how the data should appear in the destination system, including any schema or transformation logic. Create a materialization for the Estuary Flow destination as follows: + +1. Select **Destinations** from the left navigation pane. +2. Select the **+ NEW MATERIALIZATION** button in the upper-left corner of the page. +3. Navigate to the **Firebolt** connector and select **Materialization**. +4. Under **Materialization Details**, enter a descriptive name for your materialization in the text box under **Name**. +5. Under **Endpoint Config**, enter the following: + + 01. **Client ID** – The service account ID for your Firebolt service account. + 02. **Client Secret** – The secret for your Firebolt service account. + 03. **Account Name** – The name of your service account. + 04. **Database** – The name of the Firebolt database where you want to put your data. For example, `my-database`. + 05. **Engine Name** – The name of the Firebolt engine to run the queries. For example: `my-engine-name`. + 06. **S3 Bucket** – The name of the Amazon S3 bucket to store temporary intermediate files related to the operation of the external table. For example, `my-bucket`. + 07. **S3 Prefix** – (Optional) A folder or key prefix to restrict the data to a specific path within the bucket. An example prefix structure follows the format in: `temp_files/`. + 08. **AWS Key ID** – The access key ID for the AWS account linked to the Amazon S3 bucket for temporary file storage. + 09. **AWS Secret Key** – The AWS secret key associated with the Amazon S3 bucket to store temporary files. + 10. **AWS Region** – The [AWS region](https://aws.amazon.com/about-aws/global-infrastructure/regions_az/) of your Amazon S3 bucket. For example: `us-east-1`. +6. Select the **NEXT** button in the upper-right corner of the page. +7. Under **Source Collections**, do the following: + + 1. Select **Source From Capture**. + 2. In the **Captures** window, select the checkbox next to the Amazon S3 source you specified when you configured your Estuary Flow source. + 3. Select the **CONTINUE** button in the bottom-right corner of the page. + 4. Verify that the **Table** name and type in the **CONFIG** tab under **Resource Configuration** are correct, and update if necessary. + 5. (Optional) Choose **Refresh** next to **Field Selection** to preview the fields, their types, and actions that will be written to Firebolt. +8. Test and save your materialization as follows: + + 1. Select the **TEST** button in the upper-right corner of the page. Estuary will run a test for your materialization and display **Success** if it completes successfully. + 2. Select **CLOSE** in the bottom-right corner of the page. + 3. Select the **SAVE AND PUBLISH** button in the upper-right corner of the page. Estuary will test, save, and publish your materialization and display **Success** if it completes successfully. + 4. Select **CLOSE** in the bottom-right corner of the page. + +## [](#monitor-your-materialization)Monitor your materialization + +You can monitor your new data pipeline in Estuary Flow’s dashboard as follows: + +1. Select **Destinations** from the left navigation pane. +2. Select your newly created materialization to view a dashboard with the following tabs: + + 1. **OVERVIEW** – Provides a high-level summary of the materialization that includes throughput over time. + 2. **SPEC** – Displays the configurations and specifications of the materialization that includes schema mapping from the source to destination, the configuration of the destination, and any filters or constrains on the materialized data. + 3. **LOGS** – Provides records of materialization activity including success and failure events, messages, and errors. + +Ensure that your data is being ingested and transferred as expected. + +## [](#validate-your-materialization)Validate your materialization + +You can validate that your data has arrived at Firebolt as follows: + +1. Log in to the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup). +2. Select the **Develop** icon (![The Firebolt Develop Space icon.](../../assets/images/develop-icon.png)) from the left navigation pane. +3. In the **Script Editor**, run a query on the table that you specified as an Estuary Flow destination to confirm the transfer of data as follows: + + 1. Select the name of the database that you specified as your Estuary Flow destination from the drop-down list next to **Databases**. + 2. Enter a script in the script editor to query the table that you specified as an Estuary Flow destination. The following code example returns the contents of all rows and all columns from the `games` table: + + ``` + SELECT * FROM games + ``` + + You’ve successfully set up an Estuary Flow pipeline to move data from an Amazon S3 source to a Firebolt destination. Next, explore the following resources to continue expanding your knowledge base. + +## [](#additional-resources)Additional resources + +- Explore the [core concepts](https://docs.estuary.dev/concepts/) of Estuary Flow. +- Access [tutorials](https://docs.estuary.dev/getting-started/tutorials/) for Estuary Flow including a tutorial on [data transformation](https://docs.estuary.dev/guides/derivation_tutorial_sql/). +- Learn more about Estuary Flow’s [command line interface](https://docs.estuary.dev/concepts/flowctl/). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_integrations.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_integrations.md new file mode 100644 index 0000000..c2e5e03 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_integrations.md @@ -0,0 +1,16 @@ +# [](#integrate-with-firebolt)Integrate with Firebolt + +* * * + +- [Airflow](/Guides/integrations/airflow.html) +- [dbt](/Guides/integrations/connecting-with-dbt.html) +- [Apache Superset](/Guides/integrations/connecting-to-apache-superset.html) +- [Preset](/Guides/integrations/connecting-to-preset.html) +- [Cube.js](/Guides/integrations/cube-js.html) +- [Airbyte](/Guides/integrations/airbyte.html) +- [OpenTelemetry Exporter](/Guides/integrations/otel-exporter.html) +- [Tableau](/Guides/integrations/tableau.html) +- [Paradime](/Guides/integrations/connecting-to-paradime.html) +- [Metabase](/Guides/integrations/metabase.html) +- [Estuary](/Guides/integrations/estuary.html) +- [DBeaver](/Guides/integrations/dbeaver.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_metabase.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_metabase.md new file mode 100644 index 0000000..0ca8720 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_metabase.md @@ -0,0 +1,56 @@ +![Metabase](/assets/images/metabase.png) + +# [](#connecting-to-metabase)Connecting to Metabase + +[Metabase](https://www.metabase.com/) is an open-source business intelligence platform. You can use Metabase’s user interface to explore, analyze, and visualize data, query databases, generate reports, and create dashboards. + +This guide shows you how to [set up a Firebolt connector](#set-up-a-connector-to-metabase) for a self-hosted Metabase instance and how to [create a connection](#create-a-connection-to-metabase). If you are using either the managed or cloud-hosted version of [**Metabase Cloud**](https://www.metabase.com/docs/latest/cloud/start), you can skip directly to the [Create a Connection](#create-a-connection-to-metabase). + +You can also watch a short video on how to connect Metabase to Firebolt: + +**Topics:** + +1. [Set up a connector to metabase](#set-up-a-connector-to-metabase) +2. [Create a connection to metabase](#create-a-connection-to-metabase) +3. [Additional Resources](#additional-resources) + +### [](#set-up-a-connector-to-metabase)Set up a connector to metabase + +Metabase can be deployed as a **self-hosted instance**, which is a version that you install and manage on your own server infrastructure. If you are using either the managed or cloud-hosted version of [**Metabase Cloud**](https://www.metabase.com/docs/latest/cloud/start), you can skip directly to the [Create a Connection](#create-a-connection-to-metabase). + +For self-hosted deployments on-premises, the Firebolt connector must be installed manually using the following steps: + +1. **Download the Firebolt Metabase driver** + + - Go to the [GitHub Releases page for Firebolt](https://github.com/firebolt-db/metabase-firebolt-driver/releases). + - Locate the most recent version of the Firebolt driver, and download it. +2. **Move the driver file to the plugins directory** + + - Save the downloaded driver file in the `/plugins` directory on your Metabase host system. + - By default, the `/plugins` directory is located in the same folder where the `metabase.jar` file runs. After completing these steps, the Firebolt connector will be available for configuration within Metabase. + +### [](#create-a-connection-to-metabase)Create a connection to metabase + +After setting up the Firebolt connector, use the following steps to create a connection between Metabase and your Firebolt database: + +1. Open your Metabase instance’s home page in a web browser. +2. Select **Settings** from the top-right menu of the Metabase interface. +3. Select **Admin** from the dropdown menu. +4. On the **Admin** page, select **Databases** in the top navigation bar. +5. Select the **Add Database** button. +6. From the **Database Type** dropdown list, select **Firebolt**. + + Fill out the required connection details using the descriptions provided in the following table: + + Field Description **Display Name** A name to identify your database in Metabase. Use the same name as your Firebolt database for simplicity. **Client ID** The [service account ID](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id) associated with your Firebolt database. **Client Secret** The [secret for the service account](/Guides/managing-your-organization/service-accounts.html#generate-a-secret) associated with your Firebolt database. **Database name** Specify the name of the Firebolt database you want to connect to. **Account name** The name of your Firebolt account, which is required to log in and authenticate your database connection. **Engine name** Provide the name of the Firebolt engine that will be used to run queries against the database. **Additional JDBC options** Add any extra parameters needed for the connection, such as `connection_timeout_millis=10000`. For more options, access the [JDBC connection parameters guide](/Guides/developing-with-firebolt/connecting-with-jdbc.html#available-connection-parameters). +7. Select **Save** to store your database configuration. + +Verify the connection by confirming that Metabase displays a success message indicating that your Firebolt database has been added successfully. If the connection fails, double-check your settings and ensure all required fields are correct. + +### [](#additional-resources)Additional Resources + +For more information about Metabase configuration and troubleshooting, refer to the following resources: + +- [**Adding and Managing Databases**](https://www.metabase.com/docs/latest/databases/connecting) — Official Metabase documentation on connecting to data sources and managing database connections. +- [**Troubleshooting Database Connections**](https://www.metabase.com/docs/latest/troubleshooting-guide/db-connection) — Guidance on resolving issues when connecting [Metabase](https://www.metabase.com/docs/latest/databases/connecting) to your databases. +- [**Troubleshooting Database Performance**](https://www.metabase.com/docs/latest/troubleshooting-guide/db-performance) — Tips for identifying and addressing performance issues with connected databases. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_otel_exporter.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_otel_exporter.md new file mode 100644 index 0000000..5a8d7cb --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_otel_exporter.md @@ -0,0 +1,9 @@ +# [](#overview)Overview + +[OpenTelemetry](https://opentelemetry.io/) is a [CNCF](https://www.cncf.io/) project that provides a collection of APIs, SDKs, and tools to instrument, generate, collect, and export telemetry data (metrics, logs, and traces). In the past few years, this project become the accepted standard for telemetry, with native support by all major vendors. As such, Firebolt provides an OpenTelemetry exporter which gives and compatibility with minimal effort. + +Firebolt OpenTelemetry Exporter is provided as a docker image, which allows exporting engine metrics to any [OTLP](https://opentelemetry.io/docs/specs/otel/protocol/) compatible collector. This makes possible to integrate Firebolt runtime metrics into customer’s monitoring and alerting systems and be able to use homogeneous infrastructure for observability of the entire data stack. + +# [](#enabling-firebolt-opentelemetry-exporter)Enabling Firebolt OpenTelemetry Exporter + +For installation and usage instructions, see the [otel-exporter](https://github.com/firebolt-db/otel-exporter) repository on GitHub. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_integrations_tableau.md b/cmd/docs-scrapper/fireboltdocs/guides_integrations_tableau.md new file mode 100644 index 0000000..6beef0d --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_integrations_tableau.md @@ -0,0 +1,79 @@ +# [](#integrate-with-tableau)Integrate with Tableau + +![Tableau icon](../../assets/images/Tableau.png) + +[Tableau](https://www.tableau.com/) is a visual analytics platform that empowers users to explore, analyze, and present data through interactive visualizations. It supports diverse use cases such as data exploration, reporting, and collaboration, and helps users gain insights and make informed decisions. This guide shows you how to set up your Firebolt account to integrate with [Tableau Desktop](https://www.tableau.com/products/desktop) and [Tableau Exchange](https://exchange.tableau.com). + +The latest Firebolt version is not compatible with Tableau Online, and you will not be able to connect it to your Firebolt account. You can only use the connector from Tableau Exchange with an older version of Firebolt. If you want to use the latest version, use Tableau Desktop or Tableau Server and follow the instructions below. + +## [](#prerequisites)Prerequisites + +You must have the following prerequisites before you can connect your Firebolt account to Tableau: + +- **Tableau account** – You must have access to an active Tableau account. If you do not have access, you can [sign up](https://www.tableau.com/products/trial) for one. +- **Firebolt account** – You need an active Firebolt account. If you do not have one, you can [sign up](https://go.firebolt.io/signup) for one. +- **Firebolt database and table** – You must have access to a Firebolt database that contains a table with data ready for visualization. If you don’t have access, you can [create a database](/Guides/getting-started/get-started-sql.html#create-a-database) and then [load data](/Guides/loading-data/loading-data.html) into it. +- **Firebolt service account** – You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt, its ID and secret. +- **Firebolt user** – You must have a user that is [associated](/Guides/managing-your-organization/service-accounts.html#create-a-user) with your service account. The user should have [USAGE](/Overview/Security/Role-Based%20Access%20Control/database-permissions/) permission to query your database, and [OPERATE](/Overview/Security/Role-Based%20Access%20Control/engine-permissions.html) permission to start and stop an engine if it is not already started. + +## [](#connect-to-tableau)Connect to Tableau + +To connect to Tableau, you must download a Firebolt connector, a [JDBC driver](/Guides/developing-with-firebolt/connecting-with-jdbc.html#jdbc-driver), connect to Firebolt, and select a database and schema to query. You can either install [Tableau Desktop](https://www.tableau.com/products/desktop) for individual use or [Tableau Server](https://www.tableau.com/products/server) for centralized access to dashboards on a shared server. + +1. **Download and install Tableau** + + 1. To download Tableau’s Desktop, navigate to Tableau’s Desktop [download page](https://www.tableau.com/en-gb/products/desktop/download), and follow the prompts to install the program. To use Tableau Server, follow Tableau’s instructions for [installation and configuration](https://help.tableau.com/current/server/en-us/install_config_top.htm). + 2. Follow the prompts to install Tableau. +2. **Download the latest Firebolt connector** + + Download the latest version of Firebolt’s Tableau connector from Firebolt’s GitHub [repository](https://github.com/firebolt-db/tableau-connector/releases). The earliest version of the driver that is compatible with the latest version of Firebolt is [v1.1.0](https://github.com/firebolt-db/tableau-connector/releases/tag/v1.1.0). The name of the file has the following format: `firebolt_connector-.taco`, and should be saved in a specific directory that depends on the operating system used as follows: + + For Tableau Desktop, save the file connector to: + + - Windows - `C:\Users\[Windows User]\Documents\My Tableau Repository\Connectors` + - MacOS - `/Users/[user]/Documents/My Tableau Repository/Connectors` + + For any other installations including Tableau Server and older versions of Tableau, follow the steps in the Tableau [guide](https://help.tableau.com/current/pro/desktop/en-us/examples_connector_sdk.htm#use-a-connector-built-with-tableau-connector-sdk). +3. **Download the latest JDBC driver** + + Download a JDBC driver, which will allow Tableau to interact with a Firebolt databases using Java, from Firebolt’s GitHub [repository](https://github.com/firebolt-db/jdbc/releases). The name of the file has the following format: `firebolt-jdbc-.jar`, and should be saved in a specific directory that depends on the operating system as follows: + + - Windows: `C:\Program Files\Tableau\Drivers` + - Mac: `/Users//Library/Tableau/Drivers` + - Linux: `/opt/tableau/tableau_driver/jdbc` +4. **Start Tableau and verify Firebolt connector availability** + + 1. Start your Tableau Desktop or Server. If you already started Tableau prior to downloading the drivers, restart Tableau. + 2. In the left navigation panel, under **To a Server**, select the `>` to the right of **More…**. + 3. Search for and select the **Firebolt by Firebolt Analytics Inc** connector in the search bar. + 4. In the left navigation panel, under **To a Server**, select the `>` to the right of **More…**. + 5. Select **Firebolt Connector by Firebolt**. + 6. Enter the following parameters in the **General** tab: + + **Field** **Required** **Description** **Host** No Most users should not enter a value in the text box under `Host`. **Account** Yes The name of your Firebolt account within your organization. **Engine Name** Yes The name of the [engine](/Overview/engine-fundamentals.html) to run queries. **Database** Yes The name of the Firebolt [database](/Overview/indexes/using-indexes.html#databases) to connect to. **Client ID** Yes The [ID of your service account](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id). **Client Secret** Yes The [secret](/Guides/managing-your-organization/service-accounts.html#generate-a-secret) for your service account authentication. + 7. Select **Sign in**. +5. **Choose the database and the schema to query** + + After successful authentication, **Database** and **Schema** drop-down lists appear in the left navigation pane under **Connections**. The database name from the previous step appears in the database drop-down list. To change the database, you must repeat the previous step and set up a new connector. + + Choose the schema and tables as follows: + + 1. Select the drop-down list under **Schema** to select a [schema](/Overview/indexes/using-indexes.html#schema). Most users should choose `public`. For more information about schema permissions and privileges, see [Schema permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/schema-permissions.html). + 2. Drag and drop tables from the list of available tables in your schema to use them in Tableau. +6. **Visualize your data** + + Once your data source is selected you can begin visualizing the data by creating graphs and charts as follows: + + 1. Select `Sheet 1` tab from the bottom-left corner of your Tableau window next to **Data Source**. + 2. In the left navigation panel under **Sheets**, drag and drop any available columns or pre-defined aggregation from your table into the Tableau workspace to start building charts. See Tableau’s [Build a view from scratch](https://help.tableau.com/current/pro/desktop/en-us/getstarted_buildmanual_ex1basic.htm) documentation for more information. + +## [](#limitations)Limitations + +- Firebolt does not support [Tableau Cloud](https://www.tableau.com/products/cloud-bi). +- Once you have set up a connection to Firebolt, you cannot change the database that you specified during setup. In order to change the database, you must repeat step 4 to **Start Tableau and verify Firebolt connector availability** in [Connect to Tableau](#connect-to-tableau) to set up a new connection. + +## [](#additional-resources)Additional resources + +- Watch Tableau’s [free training videos](https://www.tableau.com/en-gb/learn/training) on getting started, preparing data, and geographical analysis. +- Read Tableau’s data visualization [articles](https://www.tableau.com/en-gb/learn/articles) about creating effective, engaging, and interactive examples. +- Follow Tableau’s [blog](https://www.tableau.com/en-gb/blog) for new features and tips. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_configuring_aws_role_to_access_amazon_s3.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_configuring_aws_role_to_access_amazon_s3.md new file mode 100644 index 0000000..8b9e010 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_configuring_aws_role_to_access_amazon_s3.md @@ -0,0 +1,163 @@ +# [](#use-aws-iam-roles-to-access-amazon-s3)Use AWS IAM roles to access Amazon S3 + +Firebolt uses AWS Identity and Access Management (IAM) permissions to load data from an Amazon S3 bucket into Firebolt. This requires you to set up permissions using the AWS Management Console. Specify credentials when you create an external table using one of the following options: + +- You can provide **Access Keys** associated with an IAM principal that has the required permissions. +- You can specify an **IAM role** that Firebolt assumes for the appropriate permissions. + +This guide explains how to create an AWS IAM permissions policy and an IAM role to grant Firebolt the necessary permissions to access and read data from an Amazon S3 bucket. + +1. [Create an IAM permissions policy in AWS](#create-an-iam-permissions-policy-in-aws) +2. [Create the IAM role in AWS](#create-the-iam-role-in-aws) +3. [How to specify the IAM role](#how-to-specify-the-iam-role) + + 1. [Specify the IAM role for data loading](#specify-the-iam-role-for-data-loading) + 2. [Specify the IAM role in `COPY FROM`](#specify-the-iam-role-in-copy-from) + 3. [Using IAM role in the Firebolt load data wizard](#using-iam-role-in-the-firebolt-load-data-wizard) + 4. [Using IAM role in external table definitions](#using-iam-role-in-external-table-definitions) + +## [](#create-an-iam-permissions-policy-in-aws)Create an IAM permissions policy in AWS + +01. Log in to the [AWS Identity and Access Management (IAM) Console](https://console.aws.amazon.com/iam/home#/home). +02. From the left navigation panel, under **Access management**, choose **Account settings**. +03. Under **Security Token Service (STS),** in the **Endpoints** list, find the **Region name** where your account is located. If the status is **Inactive**, choose **Activate**. +04. Choose **Policies** from the left navigation panel. +05. Select **Create Policy**. +06. Select the **JSON** tab. +07. Add a policy document that grants Firebolt access to the Amazon S3 bucket and folder. + + The following policy in JSON format provides Firebolt with the required permissions to unload data using a single bucket and folder path. Copy and paste the text into the policy editor. Replace `` and `` with the actual bucket name and path prefix. + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:GetObjectVersion" + ], + "Resource": "arn:aws:s3::://*" + }, + { + "Effect": "Allow", + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::" + }, + { + "Effect": "Allow", + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::", + "Condition": { + "StringLike": { + "s3:prefix": [ + "/*" + ] + } + } + } + ] + } + ``` + + - If you encounter the following error: `Access Denied (Status Code: 403; Error Code: AccessDenied)`, one possible fix may be to remove the following condition from the IAM policy: + + ``` + "Condition": { + "StringLike": { + "s3:prefix": [ + "/*" + ] + } + } + ``` +08. Select **Next** in the bottom-right corner of the workspace. +09. In the **Review and create** pane, under **Policy details**, enter the **Policy name**. For example, `_firebolt-s3-access_`. +10. Enter an optional **Description**. +11. Select the **Create policy** button in the bottom-right corner of the workspace. + +Setting the s3:prefix condition key to * grants access to **all** prefixes in the specified bucket for the associated action. + +## [](#create-the-iam-role-in-aws)Create the IAM role in AWS + +To integrate Firebolt with AWS, you must create an IAM role and associate it with the permission policy that you created in the previous [Create an IAM permissions policy in AWS](#create-an-iam-permissions-policy-in-aws) section. The following steps guide you through creating an IAM role, configuring the required trust policy from the Firebolt Workspace, and associating it with your IAM permissions policy. Once completed, you can use the role’s Amazon Resource Name (ARN) in Firebolt’s `CREDENTIALS` clause to enable secure data ingestion. + +01. Log in to the [AWS Identity and Access Management (IAM) Console](https://console.aws.amazon.com/iam/home#/home). +02. Select **Roles** from the left navigation panel. +03. Select the **Create role** button in the top right part of the main window. +04. In the **Select trusted entity** window, select the radio button next to **Custom trust policy**. +05. A **Custom trust policy** window opens. Leave this window open until you obtain a custom trust policy from the Firebolt **Workspace** as follows: + + 1. Log in to the [Firebolt Workspace](https://go.firebolt.io/login). + 2. Select the plus (**+**) sign in Firebolt’s **Develop Space**. + 3. Select **Load data** from the drop-down list. + 4. Select an engine from the drop-down list next to **Select engine for ingestion**. If you do not have an engine, select **Create new engine** to create one. + 5. Select the **Next step** button. + 6. Select the radio button next to **IAM Role** in the **Authentication method** row. + 7. Select the **Create an IAM role** button. + 8. In the **Create new IAM role** window that pops up, select the copy icon under **Trust policy** to copy the entire trust policy to your clipboard. + 9. Return to the AWS **Custom trust policy** window from step 5. +06. Replace the entire contents of the **Custom trust policy** with the contents of your clipboard from the Firebolt **Workspace**. +07. Select the **Next** button in the bottom right part of the main window. +08. Under **Permissions policies** enter the name of and select the checkbox next to the policy you created in step 9 of the previous section [create an IAM permissions policy in AWS](#create-an-iam-permissions-policy-in-aws). +09. Select the **Next** button in the bottom right part of the main window. +10. Under **Role name**, enter a name that you can use to identify it. +11. Select the **Create role** button in the bottom right part of the main window. +12. Under **Role name**, select the name of the role you created in step 10. +13. Copy the value under **ARN**. This value has the following format: `arn:aws:iam::123456789012:role/your_role_name`. Use the ARN value in the Firebolt `CREDENTIALS` clause as the `AWS_ROLE_ARN`, as shown in the following sections. + +Once you’ve created your IAM policy and associated it with your IAM role, you’re ready to load data into Firebolt using IAM roles. Firebolt assumes the IAM role to securely access and read data from your Amazon S3 bucket. + +## [](#how-to-specify-the-iam-role)How to specify the IAM role + +Firebolt supports AWS IAM roles for secure access to Amazon S3 when loading data. You can specify an IAM role in different ways, including in the `COPY FROM` statement, the Firebolt **Load Data** wizard, or an external table definition. The following sections explain how to configure IAM roles for each method. + +### [](#specify-the-iam-role-for-data-loading)Specify the IAM role for data loading + +When loading data into Firebolt, specify the IAM role ARN from the previous step to grant the necessary permissions. If you configured an external ID, ensure it is included along with the role ARN. The following sections show you how to load data into Firebolt using AWS IAM roles to access your storage bucket. + +### [](#specify-the-iam-role-in-copy-from)Specify the IAM role in `COPY FROM` + +Use the IAM role ARN from the previous step in the [CREDENTIALS](/sql_reference/commands/data-management/copy-from.html) of the `COPY FROM` statement. If you specified an external ID, make sure to specify it in addition to the role ARN. When you use the `COPY FROM` statement to load data from your source, Firebolt assumes the IAM role to obtain permissions to read from the location specified in the `COPY FROM` statement. + +For a step-by-step guide, see [The simplest COPY FROM workflow](/Guides/loading-data/loading-data-sql.html#the-simplest-copy-from-workflow). + +**Example** + +The following code example loads data from a CSV file in an Amazon S3 bucket into the `tutorial` table in Firebolt, using an AWS IAM role for authentication, treating the first row as a header, and automatically creating the table if it does not exist: + +``` +COPY INTO tutorial +FROM 's3://your_s3_bucket/your_file.csv' +WITH +CREDENTIALS = ( + AWS_ROLE_ARN='arn:aws:iam::123456789012:role/my-firebolt-role' + AWS_EXTERNAL_ID='ca4f5690-4fdf-4684-9d1c-2d5f9fabc4c9' +) +HEADER=TRUE AUTO_CREATE=TRUE; +``` + +### [](#using-iam-role-in-the-firebolt-load-data-wizard)Using IAM role in the Firebolt load data wizard + +You can use the role ARN from the previous step when loading data using the **Load data** wizard in the **Firebolt Workspace**. For a step-by-step guide, see [Load data using a wizard](/Guides/loading-data/loading-data-wizard.html). + +### [](#using-iam-role-in-external-table-definitions)Using IAM role in external table definitions + +Specify the IAM role ARN and the optional `external_id` in the [`CREDENTIALS`](/sql_reference/commands/data-definition/create-external-table.html) of the `CREATE EXTERNAL TABLE` statement. Firebolt assumes this IAM role when using an `INSERT INTO` statement to load data into a fact or dimension table. + +**Example** + +The following code example creates an external table which maps to Parquet files stored in an Amazon S3 bucket, using an AWS IAM role for access, and extracts partition values for `c_type` from the file path based on a specified regex pattern: + +``` +CREATE EXTERNAL TABLE my_ext_table ( + c_id INTEGER, + c_name TEXT, + c_type TEXT PARTITION('[^/]+/c_type=([^/]+)/[^/]+/[^/]+') +) +CREDENTIALS = (AWS_ROLE_ARN='arn:aws:iam::123456789012:role/my-firebolt-role' AWS_ROLE_EXTERNAL_ID='ca4f5690-4fdf-4684-9d1c-2d5f9fabc4c9') +URL = 's3://my_bucket/' +OBJECT_PATTERN= '*.parquet' +TYPE = (PARQUET) +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_creating_access_keys_aws.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_creating_access_keys_aws.md new file mode 100644 index 0000000..df8d12d --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_creating_access_keys_aws.md @@ -0,0 +1,107 @@ +# [](#creating-an-access-key-and-secret-id-in-aws)Creating an access key and secret ID in AWS + +This section will walk you through the steps to create security credentials in AWS. These credentials will be used to load data from AWS S3 into Firebolt. + +In order to enable Firebolt to load data from your S3 buckets, you must: + +1. Create a user. +2. Create appropriate permissions for this user. +3. Create access credentials to authenticate this user. + +## [](#create-a-user)Create a user + +1. Log into your AWS Management console and go to the IAM section. You can do this by typing “IAM” in the search bar. +2. Once you are in the IAM section, select the **Create User** button. + + ![Create IAM User](/assets/images/Create_User_Dialog.png) +3. Enter a name for the user and select **Next**. + + ![Specify User Name](/assets/images/Specify_User_Name.png) +4. You can have the default permission option set to **Add user to group** and select **Next**. + + ![Set Permissions](/assets/images/Set_Permissions.png) +5. Select **Create User**. + + ![Review and Create User](/assets/images/Review_Create_User.png) +6. You will see a message **User created successfully**. + + ![User created successfully](/assets/images/User_Created_Successfully.png) + +## [](#create-s3-access-permissions)Create S3 access permissions + +Now that you have created the user, you will now assign this user appropriate permissions for S3. + +1. Select on the user name as shown below. + + ![Click User](/assets/images/Click_User.png) +2. In the Permissions tab, select the **Add Permissions** drop-down and choose **Create inline policy**. + + ![Choose Inline Policy](/assets/images/Choose_Iniline_Permissions.png) +3. In **Specify Permissions** choose S3 as the service. + + ![Choose S3](/assets/images/Choose_S3.png) +4. Choose **JSON**, paste the following JSON code in the policy editor, and select **Next**. + + ![Set Permissions](/assets/images/Specify_Permissions.png) + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "s3:GetObject", + "s3:GetObjectVersion" + ], + "Resource": "arn:aws:s3::://*" + }, + { + "Effect": "Allow", + "Action": "s3:GetBucketLocation", + "Resource": "arn:aws:s3:::" + }, + { + "Effect": "Allow", + "Action": "s3:PutObject", + "Resource": "arn:aws:s3:::/*" + }, + { + "Effect": "Allow", + "Action": "s3:ListBucket", + "Resource": "arn:aws:s3:::", + "Condition": { + "StringLike": { + "s3:prefix": [ + "/*" + ] + } + } + } + ] + } + ``` + + **IMPORTANT:** Replace “<bucket>” with the S3 bucket that you want to provide access to. +5. Enter a description for the policy and select **Create Policy**. + + ![Create policy](/assets/images/Create_Policy.png) +6. You will see a message that the policy has been successfully created. + +## [](#create-access-key-and-secret-id)Create access key and secret ID + +Now that you have created a user, authorized the user with the appropriate S3 permissions, you will create access credentials for this user. These credentials will be used to authenticate the user. + +1. Select the **Security Credentials** tab, as shown in the following image: + + ![Security Credentials](/assets/images/Choose_Security_Credentials.png) +2. In the **Access Keys** section, select the **Create Access Key** button. + + ![Create Access Key](/assets/images/Create_Access_Keys.png) +3. For the use case, choose the **Application running on AWS compute service**. You will see an alternative recommendation. You can check the box that says “I understand the above recommendation and want to proceed to create an access key” and select **Next**. + + ![Use case warning](/assets/images/Access_Key_Use_Case.png) +4. Set a description tag for the access key and select \*\*C, ![Set Desc tag Access Key](/assets/images/Description_Tag_Access_Key.png) +5. You will see a message indicating that the access key was created. Make sure to download the access key. You will need these credentials when you load S3 data into Firebolt. + + ![Access Key created successfully](/assets/images/Download_CSV_Access_Key.png) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data.md new file mode 100644 index 0000000..c88334b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data.md @@ -0,0 +1,48 @@ +# [](#load-data)Load data + +You can load data into Firebolt from an Amazon S3 bucket using two different workflows. + +If you want to get started quickly, load data using a **wizard** in the **Firebolt Workspace**. If you want a more customized experience, you can write **SQL scripts** to handle each part of your workflow. This guide shows you how to load data using both the wizard and SQL, and some common data loading workflows and errors. + +![You can use either the load data wizard or SQL to create a database, engine, and then load data.](../../assets/images/load_data_workflow.png) + +Before you can load data, you must first register with Firebolt, then create a database and an engine. For information about how to register, see [Get Started](../../Guides/getting-started/). See the following sections for information about how to create a database and engine. + +## [](#load-data-using-a-wizard)Load data using a wizard + +You can use the **Load data** wizard in the **Firebolt Workspace** to load data in either CSV or Parquet format, and choose from a variety of different loading parameters which include the following: + +- Specifying a custom delimiter, quote character, escape character, and other options. +- How to handle errors during data load. +- Specifying a primary index. + +The **Load data** wizard guides you through the process of creating an engine and database as part of the loading process. + +See [Load data using a wizard](/Guides/loading-data/loading-data-wizard.html) for information about the options available in the **Load data** wizard. + +## [](#load-data-using-sql)Load data using SQL + +You can use SQL to load data in CSV, Parquet, TSV, AVRO, JSON Lines or ORC formats. Prior to loading data, you must also create a database and engine using either of the following options: + +- Use buttons in the **Firebolt Workspace** to create a database and engine. For more information, see the [Create a Database](/Guides/getting-started/get-started-sql.html#create-a-database) and [Create an Engine](/Guides/getting-started/get-started-sql.html#create-an-engine) sections in the [Get Started using SQL](/Guides/getting-started/get-started-sql.html) guide. +- Use the SQL commands [CREATE DATABASE](/sql_reference/commands/data-definition/create-database.html) and [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html). + +See [SQL to load data](/Guides/loading-data/loading-data-sql.html) for information and code examples to load data using SQL. + +## [](#optimizing-during-data-loading)Optimizing during data loading + +Optimizing your workflow for Firebolt starts when you load your data. Use the following guidance: + +1. A primary index is a sparse index that uniquely identifies rows in a table. Having a primary index is critical to query performance at Firebolt because it allows a query to locate data without scanning an entire dataset. If you are familiar with your data and query history well enough to select an optimal primary index, you can define it when creating a table. If you don’t, you can still load your data without a primary index. Then, once you know your query history patterns, you must create a new table in order to define a primary index. + + You can specify primary indexes in either the **Load data** wizard or inside SQL commands. The [Load data using a wizard](/Guides/loading-data/loading-data-wizard.html) guide discusses considerations for selecting and how to select primary indexes. The [Load data using SQL](/Guides/loading-data/loading-data-sql.html) discusses considerations for selecting and shows code examples that select primary indexes. For more advanced information, see [Primary indexes](/Overview/indexes/primary-index.html). +2. If you intend to use [aggregate functions](/sql_reference/functions-reference/aggregation/) in queries, you can calculate an aggregating index when loading your data. Then queries use these pre-calculated values to access information quickly. For an example of calculating an aggregating index during load, see [Load data using SQL](/Guides/loading-data/loading-data-sql.html). For an introduction to aggregating indexes, see the [Aggregating indexes](/Guides/getting-started/get-started-sql.html#aggregating-indexes) section of the **Get Started** guide. For more information, see [Aggregating indexes](/Overview/indexes/aggregating-index.html). + +## [](#next-steps)Next steps + +After you load your data, you can start running and optimizing your queries. A typical workflow has the previous steps followed by data and resource cleanup as shown in the following diagram: + +![The load data workflow includes using the load data wizard or SQL to create a database, engine, and then load data.](../../assets/images/get_started_workflow.png) + +- [Load data using a wizard](/Guides/loading-data/loading-data-wizard.html) +- [Load data using SQL](/Guides/loading-data/loading-data-sql.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_sql.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_sql.md new file mode 100644 index 0000000..04c1dde --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_sql.md @@ -0,0 +1,497 @@ +# [](#load-data-using-sql)Load data using SQL + +If the **Load data** wizard does not meet your needs, or you prefer to write directly in SQL, you can enter SQL and run it in the **Firebolt Workspace**, or use an API. + +Before you can load data using a SQL script, you must register with Firebolt, and create a database and an engine. + +A general workflow to load data using SQL is shown in the following diagram, with the highlighted path representing the SQL workflow and the muted path representing using the **Load data** wizard: + +![You can use either the load data wizard or SQL to create a database, engine, and then load data.](../../assets/images/load_data_sql_workflow.png) + +For more information on how to register, create a database and engine using the **Firebolt Workspace**, see the [Get Started](/Guides/getting-started/) guide. To create an engine using SQL, use [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html). You can check how many engines are defined in your current account using [SHOW ENGINES](/sql_reference/commands/metadata/show-engines.html). For more information and examples of how to create engines, see [Work with engines using DDL](/Guides/operate-engines/working-with-engines-using-ddl.html). To create a database, use [CREATE DATABASE](/sql_reference/commands/data-definition/create-database.html). You can check how many databases (i.e., catalogs) are defined in your current account using [SHOW CATALOGS](/sql_reference/commands/metadata/show-catalogs.html). Next, log into the **Firebolt Workspace** and enter SQL into the script tab in the **SQL Editor**. + +The following code examples show different workflows based on need and complexity: + +- [The simplest COPY FROM workflow](#the-simplest-copy-from-workflow) +- [Define a schema, create a table, and load data](#define-a-schema-create-a-table-and-load-data) +- [Load multiple files into a table](#load-multiple-files-into-a-table) +- [Filter data before loading using OFFSET and LIMIT](#filter-data-before-loading-using-offset-and-limit) +- [Aggregating data during data load](#aggregating-data-during-data-load) +- [Update an existing table from an external table](#update-an-existing-table-from-an-external-table) +- [Load source file metadata into a table](#load-source-file-metadata-into-a-table) +- [Continue loading even with errors](#continue-loading-even-with-errors) +- [Log errors during data load](#log-errors-during-data-load) + +## [](#the-simplest-copy-from-workflow)The simplest COPY FROM workflow + +Although there are many options to handle different data loading workflows, `COPY FROM` requires only two parameters: + +1. The name of the table that you are loading data into. +2. A location to load the data from. + +An example of the **simplest** way to invoke `COPY FROM` is: + +``` +COPY INTO tutorial FROM +'s3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' WITH HEADER=TRUE; +``` + +The previous code creates a table named `tutorial`, reads a CSV file with headers from a public Amazon S3 bucket, automatically generates a schema, and loads the data. + +If the data is contained in an Amazon S3 bucket with restricted access, you will need to provide credentials. The following example shows how to provide credentials and read a file with headers, and automatically generate a schema: + +``` +COPY INTO tutorial +FROM 's3://your_s3_bucket/your_file.csv' +WITH +CREDENTIALS = ( + AWS_ROLE_ARN='arn:aws:iam::123456789012:role/my-firebolt-role' +) +HEADER=TRUE AUTO_CREATE=TRUE; +``` + +Firebolt supports authentication using both permanent AWS access keys and temporary security credentials obtained through Amazon’s [AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) feature. To provide your credentials for the previous example, follow these steps: + +#### [](#static-credentials)Static Credentials + +Replace `` with an AWS access key ID associated with an AWS user or IAM role. The access key ID is a 20-character string (e.g., AKIAIOSFODNN7EXAMPLE). Replace `` with the AWS secret access key associated with the AWS user or IAM role. The secret access key is a 40-character string (e.g., wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY). You can also specify an `AWS_SESSION_TOKEN`. + +**Example:** + +``` +COPY INTO tutorial +FROM 's3://test-bucket/data.csv' +WITH +CREDENTIALS = ( + AWS_ACCESS_KEY_ID = 'AKIAIOSFODNN7EXAMPLE' AWS_SECRET_ACCESS_KEY = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' +) +``` + +#### [](#assume-role-authentication)Assume Role Authentication + +Replace with your role's Amazon Resource Name (ARN) of the IAM role that you want Firebolt to assume. This method gives Firebolt temporary credentials to authenticate and access your Amazon S3 bucket. + +**Example:** + +``` +COPY INTO tutorial +FROM 's3://test-bucket/data.csv' +WITH +CREDENTIALS = ( + AWS_ROLE_ARN='arn:aws:iam::746669185839:role/example' +) +``` + +## [](#define-a-schema-create-a-table-and-load-data)Define a schema, create a table, and load data + +You can also load data into an existing table using your own schema definition. Manually defining your own schema, can give you finer control over data ingestion. This example contains the following two steps: + +1. Create the target table. + + Create a table to load the data into, as shown in the following code example: + + ``` + CREATE TABLE IF NOT EXISTS levels ( + LevelID INT, + Name TEXT, + GameID INT, + LevelType TEXT, + MaxPoints INT, + PointsPerLap DOUBLE, + SceneDetails TEXT + ); + ``` + + The previous code example creates a table named `levels`, and defines each of the columns with a name and data type. For more information about the data types that Firebolt supports, see [Data types](/sql_reference/data-types.html). +2. Run COPY FROM. + + Use COPY FROM to load the data from an Amazon S3 bucket into the levels table, as shown in the following code example: + + ``` + COPY INTO levels + FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' + WITH TYPE = CSV + HEADER = TRUE; + ``` + + The previous code example reads data from a Firebolt test data set from the fictional [Ultra Fast Gaming Inc.](https://help.firebolt.io/t/ultra-fast-gaming-firebolt-sample-dataset/250) company. The `levels` data set is in CSV format, but you can also use `COPY FROM` to read files in `Parquet` format. If you are reading in a `CSV` file and specify `HEADER = TRUE`, then Firebolt expects the first line of your file to contain column names. + +## [](#load-multiple-files-into-a-table)Load multiple files into a table + +You can use the `PATTERN` option in `COPY FROM` to load several files at the same time from an Amazon S3 bucket. The `PATTERN` option uses standard regular expressions. For more information about regular expressions, see the Wikipedia [glob programming](https://en.wikipedia.org/wiki/Glob_%60%28programming%60%29) article. + +``` +COPY INTO nyc_restaurant_inspections FROM +'s3://firebolt-sample-datasets-public-us-east-1/nyc_sample_datasets/nyc_restaurant_inspections/parquet/' +WITH PATTERN="*.parquet" AUTO_CREATE=TRUE TYPE=PARQUET; +``` + +In the previous code example, the following apply: + +- **COPY INTO**: Specifies the target table to load the data into. +- **FROM**: Specifies the S3 bucket location of the data. +- **WITH PATTERN**= “\*.parquet”: Uses a regular expressions pattern with wildcards (\*) to include all Parquet files in the directory. +- **AUTO\_CREATE=TRUE**: Automatically creates the table and the schema if the table does not already exist. Parquet files include rich data, and typically have schema information for simple and high-fidelity schema creation. Specifying AUTO\_CREATE to TRUE ensures the schema in the Parquet file is preserved after loading. +- **TYPE = PARQUET**: Specifies the data format as Parquet. + +## [](#filter-data-before-loading-using-offset-and-limit)Filter data before loading using OFFSET and LIMIT + +You can use `COPY FROM` with the `LIMIT` and `OFFSET` clauses to filter out data before you load it into Firebolt. The following example demonstrates how to filter the source data by skipping the first five rows of data and inserting only the next three rows. + +``` +COPY offset_limit +FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' +OFFSET 5 LIMIT 3 +WITH TYPE = CSV HEADER = TRUE; +``` + +In the previous code example, the following apply: + +- **OFFSET**: Specifies a non-negative number of rows that are skipped before returning results from the query. +- **LIMIT**: Restricts the number of rows that are included in the result set. +- **TYPE = CSV**: Specifies the data format as CSV. +- **HEADER**: Specifies that the first row of the source file contains column headers. + +For more information about `OFFSET` and `LIMIT`, see [SELECT Query Syntax](/sql_reference/commands/queries/select.html). + +## [](#aggregating-data-during-data-load)Aggregating data during data load + +If you frequently use [aggregation functions](/sql_reference/functions-reference/aggregation/) such as `COUNT`, `MAX`, or `SUM`, you can perform these aggregations on top of an external table without loading the raw data into Firebolt. This approach allows you to avoid costs associated with importing and storing the dataset, particularly if you don’t need to store the originating data set. + +The following example shows how to aggregate data using an [external table](/Guides/loading-data/working-with-external-tables.html). Then, define a table in the Firebolt database with the desired aggregations. Finally, insert data from the external table into the internal table. This example contains the following three steps: + +1. Create an external table linked to files in an Amazon S3 bucket. + + The following code creates an external table that links to files in Amazon S3 bucket. The table has a defined schema that matches the type and names of the originating data: + + ``` + CREATE EXTERNAL TABLE IF NOT EXISTS ex_playstats ( + GameID INTEGER, + PlayerID INTEGER, + StatTime TIMESTAMPNTZ, + SelectedCar TEXT, + CurrentLevel INTEGER, + CurrentSpeed REAL, + CurrentPlayTime BIGINT, + CurrentScore BIGINT, + Event TEXT, + ErrorCode TEXT, + TournamentID INTEGER) + URL = 's3://firebolt-sample-datasets-public-us-east-1/gaming/parquet/playstats/' + OBJECT_PATTERN = '*' + TYPE = (PARQUET); + ``` + + The previous code uses `OBJECT_PATTERN` to link all (\*) files inside the specified directory contained in `URL`, and `TYPE` to specify the file format. +2. Define a table in the Firebolt database with the desired aggregations, as shown in the following code example: + + ``` + CREATE TABLE IF NOT EXISTS playstats_max_scores ( + PlayerID INTEGER, + TournamentID INTEGER, + MaxCurrentLevel INTEGER, + MaxCurrentSpeed REAL, + MaxCurrentScore BIGINT + ) PRIMARY INDEX TournamentID, PlayerID; + ``` + + The previous code creates a table with the aggregate values `MaxCurrentLevel`, `MaxCurrentSpeed`, and `MaxCurrentScore`. +3. Insert data from the external table into the internal table using the aggregate functions, as shown in the following code example: + +``` + INSERT INTO playstats_max_scores + SELECT PlayerID, + TournamentID, + MAX(CurrentLevel), + MAX(CurrentSpeed), + MAX(CurrentScore) + FROM ex_playstats + GROUP BY ALL +``` + +The previous code calculates the aggregate function `MAX` before loading the data into the `playstats_max_scores` table. + +## [](#update-an-existing-table-from-an-external-table)Update an existing table from an external table + +Firebolt saves metadata including virtual columns, and the source file’s name, size and timestamp when mapping data from an Amazon S3 bucket to a Firebolt database. You can query this metadata directly for troubleshooting and analysis, or use it to find new data, as shown in this example. + +To load only new and updated data from an Amazon S3 bucket into an existing table, use an external table and two temporary tables. This section guides you through creating a new table, which will serve as the existing table in a complete example. If you already have an existing table, its schema definition must include the file timestamp and file name metadata. For more information about these metadata columns, see **Using metadata virtual columns** in [Work with external tables](/Guides/loading-data/working-with-external-tables.html). + +The full workflow involves creating an internal source data table, an external table linked to the source data, and two temporary tables for the latest timestamp and updated data. The `updates_table` selects new data and uses an inner join to insert these records into your existing table, as illustrated in the diagram below: + +![Use an external table and two temporary tables to update a main internal table by timestamp.](../../assets/images/workflow_update_from_external_table.png) + +This example contains the following nine steps: + +1. Create a table + + The following code example shows you how to create a `players` table from a sample players dataset, and then copy data from a parquet file in an Amazon S3 bucket into it: + + ``` + CREATE TABLE IF NOT EXISTS + players ( + PlayerID INTEGER, + Nickname TEXT, + Email TEXT, + AgeCategory TEXT, + Platforms ARRAY (TEXT NULL), + RegisteredOn DATE, + IsSubscribedToNewsletter BOOLEAN, + InternalProbabilityToWin DOUBLE PRECISION, + SOURCE_FILE_NAME TEXT, + SOURCE_FILE_TIMESTAMP TIMESTAMPNTZ) + PRIMARY INDEX agecategory, registeredon; + ``` + + The previous code example defines the schema for the players table, which includes the [metadata columns](/Guides/loading-data/working-with-external-tables.html) `SOURCE_FILE_NAME` and `SOURCE_FILE_TIMESTAMP`. +2. Create an external table + + Use an external table to query the source data directly to compare it to data in your existing table. The advantages of using an external table to check for new data are as follows: + + - An external table links to the data source without loading it into a database, which avoids costs associated with importing and storing it. + - Using an external table isolates data operations to reduce the risk of corrupting data contained in the main `players` table. + + The following code example creates an external players\_ext table linked to the source data: + + ``` + CREATE EXTERNAL TABLE IF NOT EXISTS + players_ext ( + PlayerID INTEGER, + Nickname TEXT, + Email TEXT, + AgeCategory TEXT, + Platforms ARRAY (TEXT NULL), + RegisteredOn PGDATE, + IsSubscribedToNewsletter BOOLEAN, + InternalProbabilityToWin DOUBLE PRECISION) + URL = 's3://firebolt-sample-datasets-public-us-east-1/gaming/parquet/players/' + OBJECT_PATTERN = '*' + TYPE = (PARQUET); + ``` + + The previous code example defines the schema for parquet data and links to all parquet files in the Amazon S3 bucket that contains the source data. + + If you are using an external table to link to data in parquet format, the order of the columns in the external table does not have to match the order of the columns in the source data. If you are reading data in csv format, the order must match the order in the source data. +3. Copy data from the `players_ext` external table into an internal `players` table, as shown in the following code example: + + ``` + INSERT INTO players + SELECT *, + $SOURCE_FILE_NAME, + $SOURCE_FILE_TIMESTAMP + FROM players_ext + ``` +4. Create a temporary table that contains the most recent timestamp from your existing table, as shown in the following code example: + + ``` + CREATE TABLE IF NOT EXISTS control_maxdate AS ( + SELECT MAX(source_file_timestamp) AS max_time + FROM players + ); + ``` + + The previous code example uses an aggregate function `MAX` to select the most recent timestamp from the existing `players` table. +5. Create a temporary table to select and store data that has a newer timestamp than that contained in the control\_maxdate table, as shown in the following code example: + + ``` + CREATE TABLE IF NOT EXISTS updates_table AS ( + WITH external_table AS ( + SELECT *, + $SOURCE_FILE_NAME AS source_file_name_new, + $SOURCE_FILE_TIMESTAMP AS source_file_timestamp_new, + FROM players_ext + WHERE $source_file_timestamp > (SELECT max_time FROM control_maxdate) + AND playerid IN (SELECT DISTINCT playerid FROM players) + ) + SELECT + e.* + FROM players f + INNER JOIN external_table e + ON f.playerid = e.playerid + ); + ``` + + The previous code example creates an `updates_table` using a `SELECT` statement to filter out data that is older than the previously recorded timestamp. The code includes a table alias `e`, which refers to `external_table`, and the table alias `f`, which refers to the `players` table. The `INNER JOIN` uses `playerid` to match rows in the external table to those in the `player` table, and then updates the `players` table. +6. Delete records from the original players table that have been updated, based on matching player IDs in the `updates_table`, as shown in the following code example: + + ``` + DELETE FROM players + WHERE playerid IN (SELECT playerid FROM updates_table); + ``` +7. Insert updated records from the `updates_table`, including a new timestamp, into the `players` table to replace the deleted records from the previous step, as shown in the following example: + + ``` + INSERT INTO players + SELECT + playerid, + nickname, + email, + agecategory, + platforms, + registeredon, + issubscribedtonewsletter, + internalprobabilitytowin, + source_file_name_new, + source_file_timestamp_new + FROM updates_table; + ``` +8. Insert any entirely new, rather than updated, records into the `players` table, as shown in the following code example: + + ``` + INSERT INTO players + SELECT *, + $SOURCE_FILE_NAME, + $SOURCE_FILE_TIMESTAMP + FROM players_ext + WHERE $SOURCE_FILE_TIMESTAMP > (SELECT max_time FROM control_maxdate) + AND playerid NOT IN (SELECT playerid FROM players); + ``` +9. Clean up resources. Remove the temporary tables used in the update process as shown in the following code example: + + ``` + DROP TABLE IF EXISTS control_maxdate; + DROP TABLE IF EXISTS updates_table; + ``` + +## [](#load-source-file-metadata-into-a-table)Load source file metadata into a table + +When you load data from an Amazon S3 bucket, Firebolt uses an external table which holds metadata about your source file to map into a Firebolt database. You can load metadata from the virtual columns contained in the external file into a table. You can use the name, timestamp and file size to determine the source of a row of data in a table. When adding data to an existing table, you can use this information to check whether new data is available, or to determine the vintage of the data. The external table associated with your source file contains the following fields: + +- `source_file_name` - the name of your source file. +- `source_file_timestamp` - the date that your source file was modified in the Amazon S3 bucket that it was read from. +- `source_file_size` - the size of your source file in bytes. + +The following code example shows you how to create and load metadata into a `levels_meta` table, which contains only the metadata: + +``` + CREATE TABLE levels_meta ( + day_of_creation date, + name_of_file text, + size_of_file int); + COPY levels_meta( + day_of_creation $source_file_timestamp, + name_of_file $source_file_name, + size_of_file $source_file_size) + FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' + WITH AUTO_CREATE = TRUE + HEADER = TRUE + TYPE = CSV; +``` + +The following code shows you how to read in the source data from the Amazon S3 bucket and add the metadata as new columns in that table: + +``` +CREATE TABLE IF NOT EXISTS levels_meta_plus ( + "LevelID" INT, + "Name" TEXT, + "GameID" INT, + "LevelType" TEXT, + "MaxPoints" INT, + "PointsPerLap" DOUBLE, + "SceneDetails" TEXT, + day_of_creation date, + name_of_file text, + size_of_file int +); + +COPY INTO levels_meta_plus ( + "LevelID", + "GameID", + "Name", + "LevelType", + "MaxPoints", + "PointsPerLap", + "SceneDetails", + day_of_creation $source_file_timestamp, + name_of_file $source_file_name, + size_of_file $source_file_size +) +FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' +WITH +HEADER = TRUE +TYPE = CSV; +``` + +For more information about metadata, see **Using metadata virtual columns** in [Work with external tables](/Guides/loading-data/working-with-external-tables.html). + +## [](#continue-loading-even-with-errors)Continue loading even with errors + +By default, if Firebolt runs into an error when loading your data, the job will stop loading and end in error. If you want to continue loading your data even in the presence of errors, set `MAX_ERRORS_PER_FILE` to a percentage or integer larger than `0`. `COPY FROM` will then continue to load data until it exceeds the specified percent based on the total number of rows in your data. If you enter an integer between `0` and `100`, `COPY FROM` will interpret the integer as a percentage of rows. You can specify only `0%` or `100%`. + +For example, if `MAX_ERRORS_PER_FILE` is set to `0` or `0%`, `COPY FROM` will load data until one row has an error, and then return an error. Setting `MAX_ERRORS_PER_FILE` to either `100` or `100%` allows the loading process to continue even if every row has an error. If all rows have errors, no data will load into the target table. + +The following code example loads a sample CSV data set with headers, and will finish the loading job even if every row contains an error. + +``` +COPY INTO new_levels_auto +FROM 's3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/levels.csv' +WITH AUTO_CREATE = TRUE +HEADER = TRUE +TYPE = CSV +MAX_ERRORS_PER_FILE = '100%'; +``` + +In the previous code example, the following apply: + +- `COPY INTO new_levels_auto`: Creates a new table named `new_levels_auto`. The `INTO` clause is optional. If the table already exists, `COPY FROM` will add the rows to the existing table. +- `FROM`: Specifies the S3 bucket location of the data. In this example, the dataset is located in a publicly accessible bucket, so you do not need to provide credentials. +- `AUTO_CREATE=TRUE`: Creates a target table and automatically infers the schema. +- `HEADER=TRUE`: Specifies that the first row of the source file contains column headers. +- `TYPE`: Specifies the data format of the incoming data. +- `MAX_ERRORS_PER_FILE`: Specified as an integer or literal text. In the previous example, `MAX_ERRORS_PER_FILE` uses text. + +## [](#log-errors-during-data-load)Log errors during data load + +`COPY FROM` supports an option to generate error files that describe the errors encountered and note the rows with errors. To store these files in an Amazon S3 bucket, you must provide credentials to allow Firebolt to write to the bucket. + +The following example sets an error handling threshold and specifies an Amazon S3 bucket as the source data and another to write the error file: + +``` +COPY INTO my_table +FROM 's3://my-bucket/data.csv' +WITH +CREDENTIALS = ( + AWS_ROLE_ARN='arn:aws:iam::123456789012:role/my-firebolt-role' +) +MAX_ERRORS_PER_FILE = '100%' +ERROR_FILE = 's3://my-bucket/error_logs/' +ERROR_FILE_CREDENTIALS = ( + AWS_ROLE_ARN='arn:aws:iam::123456789012:role/my-firebolt-role' +) +HEADER = TRUE +``` + +In the previous code example, the following apply: + +- `COPY INTO`: Specifies the target table to load the data into. +- `FROM`: Specifies the S3 bucket location of the data. +- `CREDENTIALS`: Specifies AWS credentials to access information in the Amazon S3 bucket that contains the source data. AWS [AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) authentication is used for dynamic, temporary credentials. For more Information about credentials and how to set them up, see [The simplest COPY FROM workflow](/Guides/loading-data/loading-data-sql.html#the-simplest-copy-from-workflow). +- Error Handling: + + - `MAX_ERRORS_PER_FILE = ‘100%’`: Allows errors in up to `100%` of the rows per file before the load data job fails. + - `ERROR_FILE`: Specifies the Amazon S3 bucket location to write the error file. +- `HEADER = TRUE`: Indicates that the first row of the CSV file contains column headers. + +**How to examine the error files** + +If you specify an S3 path with the necessary permissions for an error file and the `COPY FROM` process encounters errors, two different files will be generated in your bucket. The following queries show you how to load these error files into new tables so that you can query and examine the error details and the corresponding rows. + +The following query loads the `error_reasons` csv file, which contains a header with column names: + +``` +COPY error_reasons FROM 's3://my-bucket/error_logs/' +WITH PATTERN='*error_reasons*.csv' HEADER=TRUE; + +SELECT * from error_reasons; +``` + +The following query loads a file containing all rows that encountered errors. Although this file has no header, the table schema should match that of the source file where the errors occurred. + +``` +COPY rejected_rows FROM 's3://my-bucket/error_logs/' +WITH PATTERN='*rejected_rows*.csv' HEADER=FALSE; + +SELECT * FROM rejected_rows; +``` + +Configure error handling parameters such as `MAX_ERRORS_PER_FILE`, `ERROR_FILE`, and `ERROR_FILE_CREDENTIALS` to manage how errors are handled, ensure data integrity, and record errors for future review. For more information about `ERROR_FILE` or `ERROR_FILE_CREDENTIALS`, see the **Parameters** section of [COPY FROM](/sql_reference/commands/data-management/copy-from.html). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_wizard.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_wizard.md new file mode 100644 index 0000000..61090f2 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_loading_data_wizard.md @@ -0,0 +1,185 @@ +# [](#load-data-using-a-wizard)Load data using a wizard + +The **Load data** wizard can help you get started loading data from an Amazon S3 bucket using a simple workflow. You can use the wizard to both create an engine and load your data. + +A general workflow to load data using the **Load data** wizard is shown in the following diagram as the highlighted decision path compared to using SQL shown in the muted path: + +![You can use either the Load data wizard or SQL to create a database, engine, and then load data.](../../assets/images/load_data_wizard_workflow.png) + +The wizard also guides you through setting up an AWS connection. To use the wizard, you will need the uniform resource locator (URL) of an Amazon S3 bucket. If credentials are required to access the data that you want to load, you will also need an AWS Key ID and your AWS Secret Key. In most steps in the wizard, you can view the SQL commands associated with your selections in the **Load data** main window by selecting **Show SQL script** in the left navigation pane at the bottom of the window. + +To use the wizard, use the following steps: + +1. Register and/or log in to the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup). +2. Select the (+) icon from the left navigation pane next to **Databases**. +3. Select **Load data** from the drop-down menu, as shown in the following image: + +![To launch the wizard, select the plus icon in the left navigation pane of the Firebolt Workspace.](../../assets/images/load_data_wizard_launch.png) + +## [](#select-an-engine)Select an engine + +![The next step in the wizard is to create an engine.](../../assets/images/load_data_wizard_engine.png) + +Select an engine to load data. If the engine that you want to use already exists, select it from the dropdown list next to **Select engine for ingestion**. Otherwise, select **Create new engine** from the dropdown list, and do the following: + +1. Enter a name in the **New engine name** text box. +2. Select an engine size from the drop-down list next to **Node type**. Consider the following when creating a new engine: + + 1. If you are loading data and using Firebolt for the first time, use the smallest engine size (S) and a small dataset to try out Firebolt’s capabilities. Refer to the [Get Started](/Guides/getting-started/) guide for more information. + 2. If you want to load larger datasets, and a S engine provides insufficient performance, Firebolt recommends **scaling out**, or adding more nodes, first, as shown in the following diagram. + + ![First try adding more nodes, or scaling out if you need to load a large dataset.](../../assets/images/load_data_scale_out.png) + Scaling out can enhance performance for workloads with many similarly sized files, but it also increases billing costs. + + Small and medium engines are available for use right away. If you want to use a large or extra-large engine, reach out to support@firebolt.io. For more information, see [Sizing Engines](/Guides/operate-engines/sizing-engines.html). +3. Select the number of compute nodes to use to load your data next to **Number of nodes**. A node is an individual compute unit within a compute cluster. + + + +- Using more than one node allows Firebolt to load your data and perform operations on your data in parallel on multiple nodes within a single cluster, which can speed up the data loading process. +- A higher number of nodes also means increased costs for compute resources. You can see the total cost per hour for your selection under Advanced settings, given in Firebolt Units (FBU). Each FBU is equivalent to $0.35 US dollars per hour. Find the right balance between cost and speed for your workload. You must use at least one node. + + + +1. Select the number of clusters next to **Number of clusters**. A cluster is a group of nodes that work together. The following apply: + + - If you increase the number of clusters, you will add the number of compute nodes that you selected for each added cluster. + + You can see the total cost per hour for your selection under **Advanced settings**, given in Firebolt Units (FBU). Find the right balance between cost and speed for your workload. You must use at least one cluster. +2. Select the down arrow next to **Advanced settings** for more options for your engine including setting a time to stop the engine after a period of inactivity. + +## [](#set-up-aws-connection)Set up AWS connection + +![The first step in the wizard is to connect to AWS and specify an Amazon S3 bucket.](../../assets/images/load_data_wizard_connect.png) + +### [](#a-using-public-data-that-do-not-require-access-credentials)A. Using public data that do not require access credentials + +- If the data is public and no credentials are needed, simply provide the URL of your Amazon S3 bucket and select **Next Step**. + +### [](#b-using-private-data-credentials-required)B. Using Private Data (Credentials Required) + +If the data requires credentials for access, you must provide them so that Firebolt can retrieve it from AWS on your behalf. You can choose either **Static Credentials** or **Assume Role Authentication**. + +- Use static credentials for simplicity and persistent access when security risks are low, and if your environment requires minimal configuration. +- Use [AssumeRole](https://docs.aws.amazon.com/STS/latest/APIReference/API_AssumeRole.html) Authentication\** for enhanced security, temporary access, and dynamic role management, particularly in environments requiring fine-grained permissions or cross-account access. + +#### [](#1-static-credentials)1. Static Credentials + +1. Provide the **URL** for your Amazon S3 bucket. +2. Enter your **AWS Key ID** and **AWS Secret Key**. +3. For authentication: + + - Select **Access Key ID & Secret Key** as your authentication method. + - The **AWS Key ID** is a 20-character string associated with an AWS user or IAM role (e.g., `AKIAIOSFODNN7EXAMPLE`). + - The **AWS Secret Key** is a 40-character string linked to the AWS Key ID (e.g., `wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY`). + - Optionally, you can also specify an **AWS Session Token**. + - For more information about these credentials, see [Create Access Key and Secret ID in AWS](/Guides/loading-data/creating-access-keys-aws.html). +4. Select **Next Step**. + +#### [](#2-assume-role-authentication)2. Assume Role Authentication + +1. Select **IAM Role** as your authentication method. +2. Select **Create an IAM role**. To allow Firebolt to read and write to your Amazon S3 bucket using dynamic credentials, you must do the following: + + - Create an IAM Role. + - Define an **AssumeRole** Policy. +3. After the role is created in your AWS account and the trust policy is attached, copy the **Amazon Resource Name (ARN)** of the role to your clipboard. +4. Paste the ARN into the **Amazon Resource Name** field in Firebolt. +5. Select **Next Step**. + +#### [](#3-using-firebolts-test-dataset-if-youre-not-ready-with-your-own-data)3. Using Firebolt’s Test Dataset (If You’re Not Ready with Your Own Data) + +If you don’t have your own data ready, you can use Firebolt’s sample dataset from the fictional company [Ultra Fast Gaming Inc](https://help.firebolt.io/t/ultra-fast-gaming-firebolt-sample-dataset/250): + +- Use the following Amazon S3 bucket URL: `s3://firebolt-publishing-public/help_center_assets/firebolt_sample_dataset/`. + +Alternatively, you can click the toggle button next to **Use Firebolt Playground Bucket to load sample data**. + +1. Select **Next step**. + +## [](#select-data-to-ingest)Select data to ingest + +![The next step in the wizard is to specify a data source.](../../assets/images/load_data_wizard_source.png) + +1. Select the data file that you want to load. Firebolt’s **Load data** wizard currently supports files in both CSV and Parquet formats. The contents of your S3 bucket are shown automatically along with their object type, size, and when the object was last modified. +2. Enter text or a [prefix](https://docs.aws.amazon.com/AmazonS3/latest/userguide/using-prefixes.html) into the search field above **FILE NAME** to filter the list of objects. You can enter either part of the object’s name or the full prefix that it starts with. +3. Select one file. Firebolt does not support selecting multiple files, or selecting folders. +4. If you are using Firebolt’s test data, select box next to `levels.csv`. +5. Select **Next step**. + +## [](#set-up-destination)Set up destination + +![The next step in the wizard is to specify a data target.](../../assets/images/load_data_wizard_target.png) +Specify the table inside a database that you want to load your data into. + +1. You can either select an existing database from the drop-down list next to **Select database** or **Create new database**. + + 1. If you created a new database, enter a new database name and a new table to load your data into. Select **Next step**. + 2. If you selected an existing database, select the table in the database from the drop-down list next to **Select table**, or **Create new table** and provide a new table name. +2. Select **Next step**. + +## [](#format-data)Format data + +![The next step in the wizard is to configure your data format.](../../assets/images/load_data_wizard_format.png) +A default formatting and error handling scheme shows a preview of your data. You can change the default configuration using the following options: + +1. Toggle off **Use default formatting** to show custom formatting options. You can specify options including different file delimiter, quote character, and escape character. + + - Enter a new value in the text box or select an option from the drop-down arrow next to the option that you want to change. + - After each change, the data preview changes to reflect your selection. +2. Toggle off **Use default error handling** to show the following additional error handling options: + + - You can specify a file to write errors to. Enter the name of the file that you want to write including the URL address for an Amazon S3 bucket that contains that file, and your AWS credentials. Firebolt will use these credentials to write an error file on your behalf. The output file should be in the following format: + + ``` + s3:///> + ``` + - **Max errors per file** - Specify the percentage of errors you want to allow during data loading. By default, the maximum is set to `0%`, meaning any error will stop the loading process. If you wish to continue loading despite errors, set **Max errors per file** to a non-zero value. For example, entering `10%` or `10` allows the process to continue until errors affect `10%` of the rows. +3. Select **Next step**. + +## [](#map-data)Map data + +![The next step in the wizard is to map your data to your table.](../../assets/images/load_data_wizard_map.png) + +Map the values in your data to columns into the target table. Firebolt automatically detects the schema of your data and displays information including the detected column names, type, and a preview of the data in the next window. By default, each column has a checkbox next to its name. Deselect the box if you don’t want to load the column. You can adjust the schema for the following items: + +1. **Type** - you can change the [data type](/sql_reference/data-types.html) of the column. +2. **Nullable** - toggle this switch to `ON` if the columns in your data can contain `NULL` values. If this value is toggled off for a column, and that column contains `NULL` values, then the wizard will generate an error and stop loading. +3. **Primary index** - toggle this switch to `ON` for the columns you want to include in your primary index. + + - One of Firebolt’s key optimization strategies is to use a primary index that ties to columns that are used frequently in `WHERE`, `JOIN`, `GROUP_BY`, and other clauses used for sorting. Selecting the best primary index, which is a sparse index, can reduce query run times significantly by reducing the data set that the query scans. A primary index also allows Firebolt to manage updates, deletions and insertions to tables and provide optimal query performance. + - It’s best if you choose a primary index based on knowledge about your data and query history. If you don’t know which column(s) to select, you can use Firebolt’s suggested primary indexes by keeping **Automatically assign primary indexes** checked, as shown in the following image: + + ![The next step in the wizard is to map your data to your table.](../../assets/images/load_data_wizard_autopi.png) + + Using Firebolt’s suggested primary index is preferable to having none. In the absence of a query history, Firebolt prioritizes choosing a column for the primary index in the following order: a datetime or timestamp column, a column with low cardinality, or the first column. + - If you include multiple columns as a composite primary index, they will be added in sort order. For example, if you select `column_1` first, then select `column_3`, then `column_3` will be added as a primary index after `column_1`. This means `column_1` will be used first as a sparse index, followed by `column_3`. If you choose more than one primary index, the order of sorting appears next to the toggle switch under the **Primary Index** column. In the previous example, the number `1` appears next to `column_1` and a number `2` appears next to `column_3`. To achieve optimal results, choose indexes in the order of their cardinality, or the number of unique values. Start with the column that has the highest number of unique values as your first primary index, followed by the column with the next highest cardinality. For more information about how to choose a primary index, see [Primary index](/Overview/indexes/primary-index.html). +4. Select **Next step**. + +## [](#review-configuration)Review configuration + +The **Review configuration** window displays your selections in SQL code. If you want to change the configuration, you must go back through the **Load data** wizard workflow to the section that you want to change and amend your selection. You cannot edit the SQL code in the **Review configuration** window. + +1. Select **Run ingestion** to load your data. The **Load data** wizard completes and your configuration will run in the **Develop Space** inside the **Firebolt Workspace**. The main window in the **SQL editor** contains the SQL script that configures your load data selections, and may contain several queries. + +## [](#view-results-and-query-statistics)View results and query statistics + +![The next step in the wizard is to specify a data source.](../../assets/images/load_data_wizard_results.png) + +After your load data job completes, you can view the results of each query that was configured by the **Load data** wizard in Firebolt user interface under **Results** in the bottom window. If you need to edit the queries, you can enter the change into the **SQL Editor** directly and select **Run**. + +1. View information about your query in the **Statistics** tab. This information contains the status of the query, how long it took to run, and the number of rows processed during the data loading job. +2. View metrics in the **Query Profile** tab for each operator used in your query. Select an operation to view metrics. These metrics include the following: + + 1. The output cardinality - the number of rows each operator produced. + 2. The thread time - the sum of the wall clock time that threads spent to run the selected operation across all nodes. + 3. The CPU time - the sum of the time that threads that ran the operator were scheduled on a CPU core. + 4. The output types - the data types of the result of the query. + + You can use metrics in the **Query Profile** tab to analyze and measure the efficiency and performance of your query. For example, If the CPU time is much smaller than thread time, the input-output (IO) latency may be high or the engine that you are using may be running multiple queries at the same time. For more information, see [Example with ANALYZE](/sql_reference/commands/queries/explain.html). +3. View monitoring information including the percent CPU, memory, disk use and cache read in the **Engine monitoring** tab. Information is shown from the last 5 minutes by default. Select a different time interval from the drop-down menu next to **Last 5 minutes**. You can also select the **Refresh** icon next to the drop-down menu to update the graphical information. +4. View detailed information associated with each query in the **Query history** tab. This information includes the query status, start time, number of rows and bytes scanned during the load, user and account information. You can do the following: + + 1. Select the **Refresh** icon to update the query history and ID. + 2. Select the filter icon (![filter icon](../../assets/images/filter-icon.png)) to remove or add columns to display. + 3. Select the **More options** icon (![more options icon](../../assets/images/more_options_icon.png)) to export the contents of the Query history tab to a JSON or CSV file. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_external_tables.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_external_tables.md new file mode 100644 index 0000000..36e5d08 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_external_tables.md @@ -0,0 +1,87 @@ +# [](#work-with-external-tables)Work with external tables + +Firebolt supports loading data using *external tables*, which are different from [fact and dimension tables](/Overview/indexes/using-indexes.html#firebolt-managed-tables). External tables store metadata objects that reference files stored in an Amazon S3 bucket, rather than actual data. + +To create an external table, run the [CREATE EXTERNAL TABLE](/sql_reference/commands/data-definition/create-external-table.html) command. After you create an external table, use the [INSERT](/sql_reference/commands/data-management/insert.html) command to load the data from the external table into a fact or dimension table. Data that you ingest must be in the same AWS Region as the target Firebolt database. + +Although you can run a query over an external table to return query results, we don’t recommend it. Such a query will be significantly slower than the same query run over the same data in a fact or dimension table because of the data transfer between Firebolt and your data store. We strongly recommend that you use external tables only for ingestion, specifying the table and its columns only in the `FROM` clause of an `INSERT` statement. + +## [](#workflows)Workflows + +For a simple end-to-end workflow that demonstrates loading data into Firebolt, see the [Getting started tutorial](/Guides/getting-started/). + +## [](#supported-file-formats)Supported file formats + +Firebolt supports loading the following source file formats from S3: `PARQUET`, `CSV`, `TSV`, `AVRO`, `JSON` ([JSON Lines](https://jsonlines.org/)), and `ORC`. We are quick to add support for more types, so make sure to let us know if you need it. + +## [](#using-metadata-virtual-columns)Using metadata virtual columns + +Firebolt external tables include metadata virtual columns that Firebolt populates with useful system data during ingestion. Firebolt includes these columns automatically. You don’t need to specify them in the `CREATE EXTERNAL TABLE` statement. + +When you use an external table to ingest data, you can explicitly reference these columns to ingest the metadata. First, you define the columns in a `CREATE FACT|DIMENSION TABLE` statement. Next, you specify the virtual column names to select in the `INSERT INTO` statement, with the fact or dimension table as the target. You can then query the columns in the fact or dimension table for analysis, troubleshooting, and to implement logic. For more information, see the example below. + +The metadata virtual columns listed below are available in external tables. + +Metadata column name Description Data type `$source_file_name` The full path of the row data’s source file in Amazon S3, without the bucket. For example, with a source file of `s3://my_bucket/xyz/year=2018/month=01/part-00001.parquet`, the `$source_file_name` is `xyz/year=2018/month=01/part-00001.parquet`. TEXT `$source_file_timestamp` The UTC creation timestamp in second resolution of the row’s source file in Amazon S3. (S3 objects are immutable. In cases where files are overwritten with new data - this will be Last Modified time.) TIMESTAMPTZ `$source_file_size` Size in bytes of the row’s source file in Amazon S3. BIGINT + +### [](#examplequerying-metadata-virtual-column-values)Example–querying metadata virtual column values + +The query example below creates an external table that references an AWS S3 bucket that contains Parquet files from which Firebolt will ingest values for `c_id` and `c_name`. + +``` +CREATE EXTERNAL TABLE my_external_table + ( + c_id INTEGER, + c_name TEXT + ) + CREDENTIALS = ( + AWS_ACCESS_KEY_ID = 'AKIAIOSFODNN7EXAMPLE' AWS_SECRET_ACCESS_KEY = 'wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY' + ) + URL = 's3://my_bucket/' + OBJECT_PATTERN= '*.parquet' + TYPE = (PARQUET); +``` + +The query example below creates a dimension table, which will be the target for the data to be ingested. The statement defines two additional columns, `$source_file_name` and `$source_file_timestamp`, to contain metadata values that Firebolt creates automatically for the external table. + +``` +CREATE DIMENSION TABLE my_dim_table_with_metadata +( + c_id INTEGER, + c_name TEXT, + source_file_name TEXT, + source_file_timestamp TIMESTAMPTZ, +); +``` + +Finally, the `INSERT` query below ingests the data from `my_external_table` into `my_dim_table_with_metadata`. The `SELECT` clause explicitly specifies the metadata virtual columns, which is a requirement. + +``` +INSERT INTO + my_dim_table_with_metadata +SELECT + *, + $source_file_name, + $source_file_timestamp +FROM + my_external_table; +``` + +An example `SELECT` query over `my_dim_table_with_metadata` shows that the source data file (minus the `s3://my_bucket` portion of the file path) and file timestamp are included in the dimension table for each row. + +``` +SELECT * FROM my_dim_table_with_metadata; +``` + +``` ++-----------+---------------------+------------------------ +------------------------+ +| c_id | c_name | source_file_name | source_file_timestamp | ++-----------+---------------------+-------------------------+------------------------+ +| 11385 | ClevelandDC8933 | central/cle.parquet | 2021-09-10 10:32:03+00 | +| 12386 | PortlandXfer9483 | west/pdx.parquet | 2021-09-10 10:32:04+00 | +| 12387 | PortlandXfer9449 | west/pdx.parquet | 2021-09-10 10:32:04+00 | +| 12388 | PortlandXfer9462 | west/pdx.parquet | 2021-09-10 10:32:04+00 | +| 12387 | NashvilleXfer9987 | south/bna.parquet | 2021-09-10 10:33:01+00 | +| 12499 | ClevelandXfer8998 | central/cle.parquet | 2021-09-10 10:32:03+00 | +[...] +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_json_data.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_json_data.md new file mode 100644 index 0000000..bb95ebc --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_json_data.md @@ -0,0 +1,193 @@ +# [](#load-semi-structured-json-data)Load semi-structured JSON data + +Semi-structured data does not follow a strict table format but contains structured tags or key-value pairs. JSON is an example of semi-structured data. Firebolt supports the following three ways to ingest JSON based on how your data changes and how you query it: + +- [Load JSON into a fixed schema](#load-json-into-a-fixed-schema) if your JSON data has a stable set of fields with shallow nesting. +- [Transform the input during load](#transform-the-input-during-load) if your table must always contain certain fields. +- [Store JSON as text](#store-json-as-text) if you need only specific fields on demand or if the table structure changes frequently. + +This document shows you how to load data using each of the previous methods and the sample JSON dataset in the following section. + +### [](#sample-json-dataset)Sample JSON dataset + +The following JSON data shows two session records for a website, where each line represents a single JSON object. This sample data is used in each of the examples in this document. + +``` +[ + { + "id": 1, + "StartTime": "2020-01-06 17:00:00", + "Duration": 450, + "tags": ["summer-sale", "sports"], + "user_agent": { + "agent": "Mozilla/5.0", + "platform": "Windows NT 6.1", + "resolution": "1024x4069" + } + }, + { + "id": 2, + "StartTime": "2020-01-05 12:00:00", + "Duration": 959, + "tags": ["gadgets", "audio"], + "user_agent": { + "agent": "Safari", + "platform": "iOS 14" + } + } +] +``` + +The following code example creates a staging table that stores the raw JSON data, allowing you to run the subsequent examples: + +``` +-- Create a staging table for raw JSON data with one JSON object per row +DROP TABLE IF EXISTS doc_visits_source; +CREATE TABLE doc_visits_source ( + raw_json TEXT +); + +-- Insert raw JSON data as individual rows +INSERT INTO doc_visits_source (raw_json) +VALUES +('{"id": 1, "StartTime": "2020-01-06 17:00:00", "Duration": 450, "tags": ["summer-sale", "sports"], "user_agent": {"agent": "Mozilla/5.0", "platform": "Windows NT 6.1", "resolution": "1024x4069"}}'), +('{"id": 2, "StartTime": "2020-01-05 12:00:00", "Duration": 959, "tags": ["gadgets", "audio"], "user_agent": {"agent": "Safari", "platform": "iOS 14"}}'); +``` + +If you want to load JSON data from an Amazon S3 bucket, you can create an external table that references the file as follows: + +``` +CREATE EXTERNAL TABLE visits_external ( + raw_json TEXT +) +LOCATION = 's3://your-bucket-name/path/to/json-file/' +FILE_FORMAT = (TYPE = 'JSON'); +``` + +## [](#load-json-into-a-fixed-schema)Load JSON into a fixed schema + +If your JSON data has a stable set of fields with shallow nesting, you can load it into a table with a fixed schema to simplify queries. Missing keys are assigned default values. This method allows you to query columns directly without additional parsing, making queries faster and easier to write. Extra keys that are not explicitly mapped are excluded from structured tables, making this approach less flexible for changing data. If stored separately in a `TEXT` column, they remain accessible for later extraction. + +The following code example uses the previously created `doc_visits_source` table to define columns that map directly to known keys: + +``` +-- Create the target table 'visits_fixed' with a fixed schema +DROP TABLE IF EXISTS visits_fixed; +CREATE FACT TABLE visits_fixed ( + id INT DEFAULT 0, + start_time TIMESTAMP DEFAULT '1970-01-01 00:00:00', + duration INT DEFAULT 0, + tags ARRAY(TEXT) DEFAULT [] +) +PRIMARY INDEX start_time; + +-- Insert data into 'visits_fixed' by extracting values from the raw JSON +INSERT INTO visits_fixed +SELECT + JSON_POINTER_EXTRACT(raw_json, '/id')::INT AS id, + TO_TIMESTAMP(TRIM(BOTH '"' FROM JSON_POINTER_EXTRACT(raw_json, '/StartTime')), 'YYYY-MM-DD HH24:MI:SS') AS start_time, + JSON_POINTER_EXTRACT(raw_json, '/Duration')::INT AS duration, + JSON_POINTER_EXTRACT(raw_json, '/tags')::ARRAY(TEXT) AS tags +FROM doc_visits_source; +``` + +The following table shows the expected results: + +id start\_time duration tags 1 1/6/2020 17:00 450 \[“summer-sale”, “sports”] 2 1/5/2020 12:00 959 \[“gadgets”, “audio”] + +Important characteristics of the table: + +- The mandatory scalar fields, `id`, `start_time`, and `duration`, are stored in separate columns, which makes it easier to filter, sort, or join by these fields. +- Each column maps directly to a known JSON key, allowing for simpler queries without the need for JSON functions. +- Default values ensure that the table loads even if some fields are missing or additional keys appear. Extra JSON fields such as `user_agent` with `agent`, `platform`, and `resolution` are ignored and not stored in the table. +- Array columns are used to store `tags`, which supports arbitrary numbers of values without schema changes. + +## [](#transform-the-input-during-load)Transform the input during load + +Parsing JSON data during ingestion eliminates the need for subsequent query-time parsing, simplifying and accelerating queries. However, transforming data during load also requires well-defined JSON paths that remain consistent. If the JSON paths change, the load might fail. + +The following code example uses the previously created `doc_visits_source` table to parse JSON data as it loads and inserts extracted fields into a Firebolt table named `visits_transformed`. It shows how to use [JSON\_POINTER\_EXTRACT\_KEYS](/sql_reference/functions-reference/JSON/json-pointer-extract-keys.html) and [JSON\_POINTER\_EXTRACT\_VALUES](/sql_reference/functions-reference/JSON/json-pointer-extract-values.html) to store a dynamic key-value pair – `agent_props_keys` and `agent_props_vals` – from a nested object: + +``` +DROP TABLE IF EXISTS visits_transformed; +CREATE FACT TABLE visits_transformed ( + id INT, + start_time TIMESTAMP, + duration INT, + tags ARRAY(TEXT), + agent_props_keys ARRAY(TEXT), + agent_props_vals ARRAY(TEXT) +) +PRIMARY INDEX start_time; + +INSERT INTO visits_transformed +SELECT + JSON_POINTER_EXTRACT(raw_json, '/id')::INT, + TO_TIMESTAMP(TRIM(BOTH '"' FROM JSON_POINTER_EXTRACT(raw_json, '/StartTime')), 'YYYY-MM-DD HH24:MI:SS'), + JSON_POINTER_EXTRACT(raw_json, '/Duration')::INT, + JSON_POINTER_EXTRACT(raw_json, '/tags')::ARRAY(TEXT), + JSON_POINTER_EXTRACT_KEYS(raw_json, '/user_agent')::ARRAY(TEXT), + JSON_POINTER_EXTRACT_VALUES(raw_json, '/user_agent')::ARRAY(TEXT) +FROM doc_visits_source; +``` + +The following table shows the expected results: + +id start\_time duration tags agent\_props\_keys agent\_props\_vals 1 1/6/2020 17:00 450 \[“summer-sale”,”sports”] \[“agent”, “platform”, “resolution”] \[“Mozilla/5.0”, “Windows NT 6.1”, “1024x4069”] 2 1/5/2020 12:00 959 \[“gadgets”,”audio”] \[“agent”, “platform”] \[“Safari”, “iOS 14”] + +Important characteristics of the previous table: + +- The `user_agent` object is stored in two arrays: `agent_props_keys` and `agent_props_vals`. The [`JSON_POINTER_EXTRACT_KEYS`](/sql_reference/functions-reference/JSON/json-pointer-extract-keys.html) function extracts the keys from the `user_agent` object into the `agent_props_keys` array. The [`JSON_POINTER_EXTRACT_VALUES`](/sql_reference/functions-reference/JSON/json-pointer-extract-values.html) function extracts the corresponding values into the `agent_props_vals` array. Storing keys and values in parallel arrays offers flexibility when the `user_agent` map changes and avoids schema updates for new or removed fields. + +A common error may occur if a field path does not exist in the JSON document. Firebolt returns an error because `NULL` values cannot be cast to `INT`. For example, the following query attempts to extract a non-existent field `/unknown_field` and cast it to `INT`, which results in an error: + +``` +SELECT JSON_POINTER_EXTRACT(raw_json, '/unknown_field')::INT +FROM doc_visits_source; +``` + +To avoid this error, use a default value or conditional expression as shown in the following code example: + +``` +INSERT INTO visits_transformed +SELECT + CASE + WHEN JSON_POINTER_EXTRACT(raw_json, '/unknown_field') IS NOT NULL + THEN JSON_POINTER_EXTRACT(raw_json, '/unknown_field')::INT + ELSE NULL + END AS id +FROM doc_visits_source; +``` + +The following table shows the expected results: + +id start\_time duration tags 0 NULL NULL NULL 0 NULL NULL NULL 1 1/6/2020 17:00 450 \[“summer-sale”, “sports”] 2 1/5/2020 12:00 959 \[“gadgets”, “audio”] + +## [](#store-json-as-text)Store JSON as text + +You can store JSON as a single text column if the data structure changes frequently or if you only need certain fields in some queries. This approach simplifies ingestion since no parsing occurs during loading, but it requires parsing fields at query time, which can make queries more complex if you need to extract many fields regularly. + +The following code example uses the previously created intermediary `doc_visits_source` table to create a permanent table that stores raw JSON, allowing you to parse only what you need on demand: + +``` +DROP TABLE IF EXISTS visits_raw; +CREATE FACT TABLE visits_raw ( + raw_json TEXT +); + +-- Insert data into the 'visits_raw' table from the staging table +INSERT INTO visits_raw +SELECT raw_json +FROM doc_visits_source; +``` + +The following table shows the expected results: + +raw\_json {“id”: 1, “StartTime”: “2020-01-06 17:00:00”, “Duration”: 450, “tags”: \[“summer-sale”, “sports”], “user\_agent”: {“agent”: “Mozilla/5.0”, “platform”: “Windows NT 6.1”, “resolution”: “1024x4069”}} {“id”: 2, “StartTime”: “2020-01-05 12:00:00”, “Duration”: 959, “tags”: \[“gadgets”, “audio”], “user\_agent”: {“agent”: “Safari”, “platform”: “iOS 14”}} + +Important characteristics of the table: + +- The `id`, `start_time`, `durations`, and `tags` columns follow the same purpose as in the [previous table example](#transform-the-input-during-load). +- Each row in the previous table contains a complete JSON object stored in a single `TEXT` column, rather than being parsed into separate fields. This approach is beneficial when the required fields are unknown at ingestion or the JSON structure changes frequently, allowing for flexible data storage without modifying the schema. Fields can be extracted dynamically at query time using Firebolt’s JSON functions, though frequent parsing may increase query complexity and cost. +- Parsing occurs at query time, which can save upfront processing when data is loaded, but it might increase query complexity and cost if you need to parse many fields frequently. +- Subsequent queries need to extract fields manually with JSON functions as needed. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_parquet_data.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_parquet_data.md new file mode 100644 index 0000000..9594460 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_load_parquet_data.md @@ -0,0 +1,179 @@ +# [](#load-semi-structured-parquet-data)Load semi-structured Parquet data + +Apache Parquet is a binary file format that supports both structured columns and semi-structured data, including arrays, structs, and maps. If these nested structures do not align to a strictly relational schema, they are described as semi-structured. Firebolt’s external tables support extracting these semi-structured fields from Parquet files, treating them similarly as other semi-structured data such as JSON. This document shows how to load and query Parquet data that is stored as structs in arrays or as maps of key-value pairs. + +- [Defining external table columns for Parquet arrays and maps](#defining-external-table-columns-for-parquet-arrays-and-maps) +- [Syntax for defining a Parquet nested structure](#syntax-for-defining-a-parquet-nested-structure) +- [Example–ingest and work with structs inside Parquet arrays](#exampleingest-and-work-with-structs-inside-parquet-arrays) + + - [Step 1–create an external table](#step-1create-an-external-table) + - [Step 2–create a fact or dimension table](#step-2create-a-fact-or-dimension-table) + - [Step 3–insert into the fact table from the external table](#step-3insert-into-the-fact-table-from-the-external-table) + - [Step 4–query array values](#step-4query-array-values) +- [Example–ingest and work with maps](#exampleingest-and-work-with-maps) + + - [Step 1–create an external table](#step-1create-an-external-table-1) + - [Step 2–create a fact or dimension table](#step-2create-a-fact-or-dimension-table-1) + - [Step 3–insert into the fact table from the external table](#step-3insert-into-the-fact-table-from-the-external-table-1) + - [Step 4–query map values](#step-4query-map-values) + +## [](#defining-external-table-columns-for-parquet-arrays-and-maps)Defining external table columns for Parquet arrays and maps + +When you set up an external table to ingest Parquet data files, you use a hierarchical dotted notation syntax to define table columns. Firebolt uses this notation to identify the field to ingest. + +## [](#syntax-for-defining-a-parquet-nested-structure)Syntax for defining a Parquet nested structure + +You specify the top grouping element of a nested structure in Parquet followed by the field in that structure that contains the data to ingest. You then declare the column type using the `ARRAY()` notation, where `` is the [Firebolt data type](/sql_reference/data-types.html) corresponding to the data type of the field in Parquet. + +``` +"." ARRAY() +``` + +Examples of this syntax in `CREATE EXTERNAL TABLE` queries are demonstrated below. + +## [](#exampleingest-and-work-with-structs-inside-parquet-arrays)Example–ingest and work with structs inside Parquet arrays + +Consider the Parquet schema example below. The following elements define an array of structs: + +- A single, optional group field, `hashtags`, contains any number of another group, `bag`. This is the top grouping element. +- The `bag` groups each contain a single, optional group, `array_element`. +- The`array_element` group contains a single, optional field, `s`. +- The field `some_value` contains a value that is a `TEXT` type (in binary primitive format). + +``` +optional group hashtags (LIST) { + repeated group bag { + optional group array_element { + optional binary some_value (TEXT); + } + } +} +``` + +The steps below demonstrate the process to ingest the array values into Firebolt. You create an external table, create a fact table, and insert data into the fact table from the external table, which is connected to the Parquet data store. + +### [](#step-1create-an-external-table)Step 1–create an external table + +The `CREATE EXTERNAL TABLE` example below creates a column in an external table from the Parquet schema shown in the example above. The column definition uses the top level grouping `hashtags` followed by the field `some_value`. Intermediate nesting levels are omitted. + +``` +CREATE EXTERNAL TABLE IF NOT EXISTS my_parquet_array_ext_tbl +( + [...,] --additional columns possible, not shown + "hashtags.some_value" ARRAY(TEXT) + [,...] +) +CREDENTIALS = (AWS_KEY_ID = '****' AWS_SECRET_KEY = '*****') +URL = 's3://my_bucket_of_parquet_goodies/' +OBJECT_PATTERN = '*.parquet' +TYPE = (PARQUET); +``` + +### [](#step-2create-a-fact-or-dimension-table)Step 2–create a fact or dimension table + +Create a fact or dimension table that defines a column of the same `ARRAY(TEXT)` type that you defined in the external table in step 1. The example below demonstrates this for a fact table. + +``` +CREATE FACT TABLE IF NOT EXISTS my_parquet_array_fact_tbl +( + [...,] --additional columns possible, not shown + some_value ARRAY(TEXT) + [,...] +) +[...] +--required primary index for fact table not shown +--optional partitions not shown +; +``` + +### [](#step-3insert-into-the-fact-table-from-the-external-table)Step 3–insert into the fact table from the external table + +The example below demonstrates an `INSERT` statement that selects the array values from Parquet data files using the external table column definition in step 1, and then inserts them into the specified fact table column, `some_value`. + +``` +INSERT INTO my_parquet_array_fact_tbl + SELECT "hashtags.some_value" AS some_value + FROM my_parquet_array_ext_tbl; +``` + +### [](#step-4query-array-values)Step 4–query array values + +After you ingest array values into the fact table, you can query and manipulate the array using array functions and Lambda functions. For more information, see [Working with arrays](/Guides/loading-data/working-with-semi-structured-data/working-with-arrays.html). + +Use multipart Parquet column names to extract data from nested structures. For simple `ARRAY(TEXT)`, use a single top-level field name. + +## [](#exampleingest-and-work-with-maps)Example–ingest and work with maps + +External tables connected to AWS Glue currently do not support reading maps from Parquet files. + +Parquet stores maps as arrays of key-value pairs, where each key\_value group contains a key and its corresponding value. Consider the Parquet schema example below. The following define the key-value elements of the map: + +- A single, optional group, `context`, is a group of mappings that contains any number of the group `key_value`. +- The `key_value` groups each contain a required field, `key`, which contains the key name as a `TEXT`. Each group also contains an optional field `value`, which contains the value as a `TEXT` corresponding to the key name in the same `key_value` group. + +``` +optional group context (MAP) { + repeated group key_value { + required binary key (TEXT); + optional binary value (TEXT); + } + } +``` + +The steps below demonstrate the process of creating an external table, creating a fact table, and inserting data into the fact table from the Parquet file using the external table. + +### [](#step-1create-an-external-table-1)Step 1–create an external table + +When you create an external table for a Parquet map, you use the same syntax that you use in the example for arrays above. You create one column for keys and another column for values. The `CREATE EXTERNAL TABLE` example below demonstrates this. + +``` +CREATE EXTERNAL TABLE IF NOT EXISTS my_parquet_map_ext_tbl +( + "context.keys" ARRAY(TEXT), + "context.values" ARRAY(TEXT) +) +CREDENTIALS = (AWS_KEY_ID = '****' AWS_SECRET_KEY = '*****') +URL = 's3://my_bucket_of_parquet/' +OBJECT_PATTERN = '*.parquet' +TYPE = (PARQUET); +``` + +### [](#step-2create-a-fact-or-dimension-table-1)Step 2–create a fact or dimension table + +Create a Firebolt fact or dimension table that defines columns of the same `ARRAY(TEXT)` types that you defined in the external table in step 1. The example below demonstrates this for a fact table. + +``` +CREATE FACT TABLE IF NOT EXISTS my_parquet_map_fact_tbl +( + [...,] --additional columns possible, not shown + my_parquet_array_keys ARRAY(TEXT), + my_parquet_array_values ARRAY(TEXT) + [,...] +) +[...] --required primary index for fact table not shown + --optional partitions not shown +``` + +### [](#step-3insert-into-the-fact-table-from-the-external-table-1)Step 3–insert into the fact table from the external table + +The example below demonstrates an `INSERT INTO` statement that selects the array values from Parquet data files using the external table column definition in step 1, and inserts them into the specified fact table columns, `my_parquet_array_keys` and `my_parquet_array_values`. + +``` +INSERT INTO my_parquet_map_fact_tbl + SELECT "context.keys" AS my_parquet_array_keys, + "context.values" AS my_parquet_array_values + FROM my_parquet_map_ext_tbl; +``` + +### [](#step-4query-map-values)Step 4–query map values + +After you ingest array values into the fact table, you can query and manipulate the array using array functions and Lambda functions. For more information, see [Working with arrays](/Guides/loading-data/working-with-semi-structured-data/working-with-arrays.html). + +A query that uses a Lambda function to return a specific value by specifying the corresponding key value is shown below. For more information, see [Manipulating arrays using Lambda functions](/Guides/loading-data/working-with-semi-structured-data/working-with-arrays.html#manipulating-arrays-with-lambda-functions). + +``` +SELECT + ARRAY_FIRST(v, k -> k = 'key_name_of_interest', my_parquet_array_keys, my_parquet_array_values) AS returned_corresponding_key_value +FROM + my_parquet_map_ext_tbl; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_arrays.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_arrays.md new file mode 100644 index 0000000..50554b1 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_arrays.md @@ -0,0 +1,387 @@ +# [](#work-with-arrays)Work with arrays + +This section covers querying and manipulating arrays in Firebolt. + +- [Declaring ARRAY data types in table definitions](#declaring-array-data-types-in-table-definitions) +- [Simple array functions](#simple-array-functions) +- [Manipulating arrays with Lambda functions](#manipulating-arrays-with-lambda-functions) + + - [Lambda function general syntax](#lambda-function-general-syntax) + - [Lambda function example–single array](#lambda-function-examplesingle-array) + - [Lambda function example–multiple arrays](#lambda-function-examplemultiple-arrays) +- [UNNEST](#unnest) + + - [Example–single UNNEST with single ARRAY-typed column](#examplesingle-unnest-with-single-array-typed-column) + - [Example–single UNNEST using multiple ARRAY-typed columns](#examplesingle-unnest-using-multiple-array-typed-columns) + - [Example–multiple UNNEST clauses resulting in a Cartesian product](#examplemultiple-unnest-clauses-resulting-in-a-cartesian-product) + - [Example–error on UNNEST of multiple arrays with different lengths](#exampleerror-on-unnest-of-multiple-arrays-with-different-lengths) +- [ARRAY input and output syntax](#array-input-and-output-syntax) + + - [Converting ARRAY to TEXT](#converting-array-to-text) + - [Converting TEXT to ARRAY](#converting-text-to-array) + - [Nested ARRAYs](#nested-arrays) + +## [](#declaring-array-data-types-in-table-definitions)Declaring ARRAY data types in table definitions + +Array types are declared using `ARRAY()` where `` can be any data type that Firebolt supports. This includes the `ARRAY` data type, so arrays can be arbitrarily nested. + +If you load an array from a CSV file, the arrays in the CSV file must be enclosed in double quotes (`""`). + +For example, if a CSV file contains a row containing `value1 , value2 , "[array_value3 , array_value4]"` , you can create a table using the following code to read `array_value3` and `array_value4` into `array_values`. + +``` +CREATE TABLE IF NOT EXISTS array_example + ( + value1 STRING, + value2 STRING, + array_values ARRAY(TEXT) + ) + URL = 's3://path_to_your_data/' + TYPE = (csv); +``` + +Array literals are also supported. For example, the `SELECT` statement shown below is valid. + +``` +SELECT [1,2,3,4] +``` + +### [](#basis-for-examples)Basis for examples + +All examples in this topic are based on the table below, named `visits`. The column `id` is of type `INTEGER`. All other columns are of type `ARRAY(TEXT)`. + +![](/assets/images/array_example_table.png) + +## [](#simple-array-functions)Simple array functions + +There are several fundamental functions that you can use to work with arrays, including [ARRAY\_LENGTH](/sql_reference/functions-reference/array/array-length.html), [ARRAY\_CONCAT](/sql_reference/functions-reference/array/array-concat.html), and [ARRAY\_FLATTEN](/sql_reference/functions-reference/array/flatten.html). See the respective reference for a full description. Brief examples are shown below. + +### [](#length-example)LENGTH example + +`LENGTH` returns the number of elements in an array. + +``` +SELECT + id, + LENGTH(agent_props_keys) as key_array_length +FROM visits; +``` + +**Returns**: + +``` ++-----------------------+ +| id | key_array_length | ++-----------------------+ +| 1 | 3 | +| 2 | 2 | +| 3 | 3 | ++-----------------------+ +``` + +### [](#array_concat-example)ARRAY\_CONCAT example + +`ARRAY_CONCAT` combines multiple arrays into a single array. + +``` +SELECT + id, + ARRAY_CONCAT(agent_props_keys, agent_props_vals) as concat_keys_and_vals +FROM visits; +``` + +**Returns**: + +``` ++----+------------------------------------------------------------------------------+ +| id | concat_keys_and_vals | ++----+------------------------------------------------------------------------------+ +| 1 | ["agent","platform","resolution","Mozilla/5.0","Windows NT 6.1","1024X4069"] | +| 2 | ["agent","platform","Safari","iOS 14"] | +| 3 | ["agent","platform","platform","Safari","iOS 14","Windows 11"] | ++----+------------------------------------------------------------------------------+ +``` + +### [](#array_flatten-example)ARRAY\_FLATTEN example + +`ARRAY_FLATTEN` converts an ARRAY of ARRAYs into a single flattened ARRAY. Note that this operation flattens only one level of the array hierarchy. + +``` +SELECT ARRAY_FLATTEN([ [[1,2,3],[4,5]], [[2]] ]) as flattened_array; +``` + +**Returns**: + +``` ++---------------------+ +| flattened_array | ++---------------------+ +| [[1,2,3],[4,5],[2]] | ++---------------------+ +``` + +## [](#manipulating-arrays-with-lambda-functions)Manipulating arrays with Lambda functions + +Firebolt *Lambda functions* are a powerful tool that you can use on arrays to extract results. Lambda functions iteratively perform an operation on each element of one or more arrays. Arrays and the operation to perform are specified as arguments to the Lambda function. + +### [](#lambda-function-general-syntax)Lambda function general syntax + +The general syntax pattern of a Lambda function is shown below. For detailed syntax and examples see the reference topics for [Lambda functions](/sql_reference/functions-reference/Lambda/). + +``` +([, ][, ...]) -> , [, ][, ...]) +``` + +Parameter Description `` Any array function that accepts a Lambda expression as an argument. For a list, see [Lambda functions](/sql_reference/functions-reference/Lambda/). `[, ][, ...]` A list of one or more variables that you specify. The list is specified in the same order and must be the same length as the list of array expressions (`[, ][, ...]`). At runtime, each variable contains an element of the corresponding array. The specified `` is performed for each variable. The operation that is performed for each element of the array. This is typically a function or Boolean expression. \[, ]\[, ...] A comma-separated list of expressions, each of which evaluates to an `ARRAY` data type. + +### [](#lambda-function-examplesingle-array)Lambda function example–single array + +Consider the following [TRANSFORM](/sql_reference/functions-reference/Lambda/transform.html) array function that uses a single array variable and reference in the Lambda expression. This example applies the `UPPER` function to each element `t` in the `ARRAY`-typed column `tags`. This converts each element in each `tags` array to upper-case. + +``` +SELECT + id, + TRANSFORM(t -> UPPER(t), tags) AS up_tags +FROM visits; +``` + +**Returns:** + +``` ++----+--------------------------+ +| id | up_tags | ++----+--------------------------+ +| 1 | ["SUMMER-SALE","SPORTS"] | +| 2 | ["GADGETS","AUDIO"] | +| 3 | ["SUMMER-SALE","AUDIO"] | ++----+--------------------------+ +``` + +### [](#lambda-function-examplemultiple-arrays)Lambda function example–multiple arrays + +[ARRAY\_FIRST](/sql_reference/functions-reference/Lambda/array-first.html) is an example of a function that takes multiple arrays as arguments in a map of key-value pairs. One array represents the keys and the other represents the values. + +`ARRAY_FIRST` uses a Boolean expression that you specify to find the key in the key array. If the Boolean expression resolves to true, the function returns the first value in the value array that corresponds to the key’s element position. If there are duplicate keys, only the first corresponding value is returned. + +The example below returns the first value in the `agent_props_vals` array where the corresponding position in the `agent_props_keys` array contains the key `platform`. + +``` +SELECT + id, + ARRAY_FIRST(v, k -> k = 'platform', agent_props_vals, agent_props_keys) AS platform +FROM visits; +``` + +**Returns**: + +``` ++----+----------------+ +| id | platform | ++----+----------------+ +| 1 | Windows NT 6.1 | +| 2 | iOS 14 | +| 3 | iOS 14 | ++----+----------------+ +``` + +[ARRAY\_SORT](/sql_reference/functions-reference/array/array-sort.html) sorts one array by another. One array represents the values and the other represents the sort order. + +The example below sorts the first array by the positions defined in the second array + +``` +SELECT + ARRAY_SORT(x,y -> y, [ 'A','B','C'],[3,2,1]) AS res; +``` + +**Returns**: + +``` ++-----------------+ +| res | ++-----------------+ +| ["C", "B", "A"] | ++-----------------+ +``` + +## [](#unnest)UNNEST + +You might want to transform a nested array structure to a standard tabular format. `UNNEST` serves this purpose. + +[UNNEST](/sql_reference/commands/queries/select.html#unnest) is a table-valued function (TVF) that transforms an input row containing an array into a set of rows. `UNNEST` unfolds the elements of the array and duplicates all other columns found in the `SELECT` clause for each array element. If the input array is empty, the corresponding row is eliminated. + +You can use a single `UNNEST` command to unnest several arrays if the arrays are the same length. + +Multiple `UNNEST` statements in a single `FROM` clause result in a Cartesian product. Each element in the first array has a record in the result set corresponding to each element in the second array. + +### [](#examplesingle-unnest-with-single-array-typed-column)Example–single UNNEST with single ARRAY-typed column + +The following example unnests the `tags` column from the `visits` table. + +``` +SELECT + id, + tag +FROM + visits, + UNNEST(tags) as r(tag); +``` + +**Returns**: + +``` ++----+---------------+ +| id | tag | ++----+---------------+ +| 1 | "summer-sale" | +| 1 | "sports" | +| 2 | "gadgets" | +| 2 | "audio" | ++----+---------------+ +``` + +### [](#examplesingle-unnest-using-multiple-array-typed-columns)Example–single UNNEST using multiple ARRAY-typed columns + +The following query specifies both the `agent_props_keys` and `agent_props_vals` columns to unnest. + +``` +SELECT + id, + a_key, + a_val +FROM + visits, + UNNEST(agent_props_keys, agent_props_vals) as r(a_key, a_val); +``` + +**Returns**: + +``` ++----+------------+------------------+ +| id | a_key | a_val | ++----+------------+------------------+ +| 1 | agent | “Mozilla/5.0” | +| 1 | platform | “Windows NT 6.1” | +| 1 | resolution | “1024x4069” | +| 2 | agent | “Safari” | +| 2 | platform | “iOS 14” | ++----+------------+------------------+ +``` + +### [](#examplemultiple-unnest-clauses-resulting-in-a-cartesian-product)Example–multiple UNNEST clauses resulting in a Cartesian product + +The following query, while valid, creates a Cartesian product. + +``` +SELECT + id, + a_key, + a_val +FROM + visits, + UNNEST(agent_props_keys as a_keys) as r1(a_key), + UNNEST(agent_props_vals as a_vals) as r2(a_val); +``` + +**Returns**: + +``` ++-----+------------+------------------+ +| id | a_key | a_val | ++-----+------------+------------------+ +| 1 | agent | "Mozilla/5.0" | +| 1 | agent | "Windows NT 6.1" | +| 1 | agent | "1024x4069" | +| 1 | platform | "Mozilla/5.0" | +| 1 | platform | "Windows NT 6.1" | +| 1 | platform | "1024x4069" | +| 1 | resolution | "Mozilla/5.0" | +| 1 | resolution | "Windows NT 6.1" | +| 1 | resolution | "1024x4069" | +| 2 | agent | "Safari" | +| 2 | agent | "iOS 14" | +| 2 | platform | "Safari" | +| 2 | platform | "iOS 14" | ++-----+------------+------------------+ +``` + +### [](#exampleerror-on-unnest-of-multiple-arrays-with-different-lengths)Example–error on UNNEST of multiple arrays with different lengths + +The following query is **invalid** and will result in an error as the `tags` and `agent_props_keys` arrays have different lengths for row 1. + +``` +SELECT + id, + tag, + a_key +FROM + visits, + UNNEST(tags, agent_props_keys) as r(tag, a_key); +``` + +## [](#array-input-and-output-syntax)ARRAY input and output syntax + +`ARRAY` values can be converted from and to `TEXT`. This happens, for example, when an explicit `CAST` is added to a query, or when `ARRAY` values are (de-)serialized in a `COPY` statement. + +### [](#converting-array-to-text)Converting ARRAY to TEXT + +Broadly, the `TEXT` representation of an `ARRAY` value starts with an opening curly brace (`{`). This is followed by the `TEXT` representations of the individual array elements separated by commas (`,`). It ends with a closing curly brace (`}`). `NULL` array elements are represented by the literal string `NULL`. For example, the query + +``` +SELECT + CAST([1,2,3,4,NULL] AS TEXT) +``` + +returns the `TEXT` value `'{1,2,3,4,NULL}'`. + +When converting `ARRAY` values containing `TEXT` elements to `TEXT`, some additional rules apply. Specifically, array elements are enclosed by double quotes (`"`) in the following cases: + +- The array element is an empty string. +- The array element contains curly or square braces (`{`,`[`,`]`,`}`), commas (`,`), double quotes (`"`), backslashes (`\`), or white space. +- The array element matches the word `NULL` (case-insensitively). + +Additionally, double quotes and backslashes embedded in array elements will be backslash-escaped. For example, the query + +``` +SELECT + CAST(['1','2','3','4',NULL,'','{impostor,array}','["impostor","array","back\slash"]',' padded and spaced ', 'only spaced', 'null'] AS TEXT) +``` + +returns the `TEXT` value `'{1,2,3,4,NULL,"","{impostor,array}","[\"impostor\",\"array\",\"back\\slash\"]"," padded and spaced ","only spaced","null"}'`. + +### [](#converting-text-to-array)Converting TEXT to ARRAY + +When converting the `TEXT` representation of an array back to `ARRAY`, the same quoting and escaping rules as above apply. Unquoted whitespace surrounding array elements is trimmed, but whitespace contained within array elements is preserved. The array elements themselves are converted according to the conversion rules for the requested array element type. For example, the query + +``` +SELECT + CAST('{1, 2, 3, 4, null, "", "{impostor,array}", "[\"impostor\",\"array\",\"back\\slash\"]", " padded and spaced ", "null", unquoted padded and spaced }' AS ARRAY(TEXT)) +``` + +returns the `ARRAY(TEXT)` value `[1,2,3,4,NULL,'','{impostor,array}','["impostor","array","back\slash"]',' padded and spaced ','null','unquoted padded and spaced']`. + +It is also possible to enclose arrays with square braces (`[` and `]`) instead of curly braces (`{` and `}`) when converting from `TEXT` to `ARRAY`. For example, the query + +``` +SELECT + CAST('[1, 2, 3, 4, NULL]' AS ARRAY(INTEGER)) +``` + +returns the `ARRAY(INTEGER)` value `[1,2,3,4,NULL]`. + +### [](#nested-arrays)Nested ARRAYs + +Finally, the same prodedure applies when converting nested `ARRAY` values from and to `TEXT`. For example, the query + +``` +SELECT + CAST([NULL,[],[NULL],[1,2],[3,4]] AS TEXT) +``` + +returns the `TEXT` value `{NULL,{},{1,2},{3,4}}`, and the query + +``` +SELECT + CAST('{NULL,{},{1,2},{3,4}}' AS ARRAY(ARRAY(INTEGER))) +``` + +returns the `ARRAY(ARRAY(INTEGER))` value `[NULL,[],[NULL],[1,2],[3,4]]`. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_semi_structured_data.md b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_semi_structured_data.md new file mode 100644 index 0000000..f553233 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_loading_data_working_with_semi_structured_data_working_with_semi_structured_data.md @@ -0,0 +1,21 @@ +# [](#work-with-semi-structured-data)Work with semi-structured data + +Semi-structured data is any data that does not follow a strict tabular schema and often includes fields that are not standard SQL data types. This data typically has a nested structure and supports complex types such as arrays, maps, and structs. + +Common formats of semi-structured data include: + +- **JSON**— A widely used format for semi-structured data. For information on loading JSON data with Firebolt, see [Load semi-structured JSON data](/Guides/loading-data/working-with-semi-structured-data/load-json-data.html). +- **Parquet and ORC**— Serialization formats that support nested structures and complex data types. For information on loading Parquet data with Firebolt, see [Load semi-structured Parquet data](/Guides/loading-data/working-with-semi-structured-data/load-parquet-data.html). + +## [](#firebolts-approach-to-semi-structured-data)Firebolt’s approach to semi-structured data + +Firebolt transforms semi-structured data using arrays, enabling efficient querying. Arrays in Firebolt represent the following data constructs: + +- **Variable-length arrays**— Arrays with unpredictable lengths in the source data are supported by Firebolt. These arrays can have arbitrary nesting levels, provided the nesting level is consistent within a column and known during table creation. +- **Maps**— Maps, also known as dictionaries, are represented using two coordinated arrays—one for keys and one for values. This approach is particularly useful for JSON-like data where objects have varying keys. + +* * * + +- [Load semi-structured JSON data](/Guides/loading-data/working-with-semi-structured-data/load-json-data.html) +- [Load semi-structured Parquet data](/Guides/loading-data/working-with-semi-structured-data/load-parquet-data.html) +- [Work with arrays](/Guides/loading-data/working-with-semi-structured-data/working-with-arrays.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization.md new file mode 100644 index 0000000..8d69785 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization.md @@ -0,0 +1,20 @@ +# [](#register-and-set-up-your-organization)Register and set up your organization + +Learn how to register your organization, create accounts, and create logins and/or service accounts. + +- [Register your organization](/Guides/managing-your-organization/creating-an-organization.html) +- [Create an account](/Guides/managing-your-organization/managing-accounts.html) +- [Manage logins](/Guides/managing-your-organization/managing-logins.html) +- [Create service accounts](/Guides/managing-your-organization/service-accounts.html) + +# [](#manage-user-permissions)Manage user permissions + +Learn how to create users, and link logins and/or service accounts. + +- [Manage users](/Guides/managing-your-organization/managing-users.html) + +# [](#manage-billing)Manage billing + +Learn how to manage billing with observability at both organization and account levels. + +- [Manage billing](/Guides/managing-your-organization/billing.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_billing.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_billing.md new file mode 100644 index 0000000..3968304 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_billing.md @@ -0,0 +1,33 @@ +# [](#billing)Billing + +Firebolt bills are based on the consumption of resources within each account in your organization. This includes the total amount of data stored and engine usage. + +- **Data storage** usage is calculated on the daily average amount of data (in bytes) stored under your Firebolt account name for indexes and raw compressed data. +- **Engine resources** usage is calculated with **one-second granularity** between the time Firebolt makes the engine available for queries and when the the engine moves to the stopped state. + +## [](#set-up-account-billing-through-aws-marketplace)Set-up account billing through AWS Marketplace + +To continue using Firebolt’s engines for query execution after your initial $200 credit, valid for 30 days, you’ll need to set-up a billing account by connecting your account to the [AWS Marketplace](https://aws.amazon.com/marketplace). + +**Steps for registration:** + +1. On the [Firebolt Workspace page](https://go.firebolt.io/), select the **Configure**(![AggIndex](../../assets/images/configure-icon.png)) icon from the left navigation pane. +2. Under **Organization settings**, select **Billing**. +3. Click **Connect to AWS Marketplace** to take you to the Firebolt page on AWS Marketplace. +4. On the AWS Marketplace page, click the **View Purchase Options** in the top right hand corner of the screen. +5. Click **Setup Your Account**. + +Your account should now be associated with AWS Marketplace. + +## [](#invoices)Invoices + +Invoices for Firebolt engines and data are submitted through the AWS Marketplace. The final monthly invoice is available on the third day of each month through the AWS Marketplace. A billing cycle starts on the first day of the month and finishes on the last day of the same month. + +## [](#viewing-billing-information)Viewing billing information + +Users with the **Org Admin** role can monitor the cost history of each account in the organization. + +**To view cost information for your organization** Organization cost details are captured in two information\_schema tables. Query those two tables and retrieve any information about the organization’s cost +1\) [Engines billing](/sql_reference/information-schema/engines-billing.html) 2) [Storage billing](/sql_reference/information-schema/storage-billing.html) + +Firebolt billing is reported to the AWS Marketplace at the beginning of the next day. By default, the **Accounts & Billing** page displays the engine usage breakdown based on billing time. If you prefer to see the engine usage by actual usage day, you can click the **Engines breakdown** selector under the **Usage cost by engine** table and click **Actual running time**. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_creating_an_organization.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_creating_an_organization.md new file mode 100644 index 0000000..7e87995 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_creating_an_organization.md @@ -0,0 +1,36 @@ +# [](#register-to-firebolt)Register to Firebolt + +To start working with Firebolt, you first need to register your organization and create your first account. An organization provides a logical structure for managing accounts, billing, and authentication. [Read more about organizations and accounts and their benefits.](/Overview/organizations-accounts.html) + +When registering to Firebolt, the domain name used in your registration email will determine the organization name. Organization names are globally unique — no two organizations can have the same name. If you need two organizations under the same domain, contact the Firebolt Support team for further assistance. + +## [](#create-an-organization)Create an organization + +To register to Firebolt and create an organization: + +1. Go to Firebolt’s registration page: [go.firebolt.io/signup](https://go.firebolt.io/signup) +2. Enter the following information in the form: + + - First name + - Last name + - Email - make sure you use a business email address, such as `you@anycorp.com`. Based on that address, Firebolt infers the name of your company and organization. Firebolt does not support usernames with personal email addresses, such as `me@gmail.com` or `you@outlook.com`. + - Region in which to create your first account. You will be able to create additional accounts in other regions later on, if needed. +3. Click **Register**. +4. An email will be sent to the address provided to verify the organization. When this email is received, click on **Activate**. To move on to the next step, Firebolt will approve your registration request and validate your information - this step might take a couple of minutes to complete. +5. Once approved, you will get a welcome email. Click **Go to Firebolt** in this email. +6. Enter a password as instructed and choose **Set password**. +7. Choose **Log in**. Enter your login information (email address and password) and click **Log in**. + +Congratulations - you have successfully set up your organization. Welcome to Firebolt! + +![Enter Firebolt](../../assets/images/enter_firebolt.png) + +Your organization comes prepared with one account for your convenience - choose your own name or keep the default. + +## [](#next-steps)Next steps: + +- [Manage accounts](/Guides/managing-your-organization/managing-accounts.html) +- [Create logins](/Guides/managing-your-organization/managing-logins.html) or [set up SSO authentication](/Guides/security/sso/) +- [Add users](/Guides/managing-your-organization/managing-users.html) to your account +- [Manage roles](/Guides/security/rbac.html) +- Create databases, engines, and load your data. Follow our [getting started tutorial](/Guides/getting-started/) to try this out with sample data. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_accounts.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_accounts.md new file mode 100644 index 0000000..a1114ae --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_accounts.md @@ -0,0 +1,89 @@ +# [](#manage-accounts)Manage accounts + +Your organization comes prepared with one account for your convenience. You can add more accounts, edit existing accounts, or delete accounts using SQL or in the UI. + +To view all accounts, click **Configure** to open the configure space, then choose **Accounts** from the menu, or query the [information\_schema.accounts](/sql_reference/information-schema/accounts.html) view. + +## [](#create-a-new-account)Create a new account + +Creating an account requires the org\_admin role. + +### [](#sql)SQL + +To create an account using SQL, use the [CREATE ACCOUNT](/sql_reference/commands/data-definition/create-account.html) statement. For example: + +``` +CREATE ACCOUNT dev WITH REGION = 'us-east-1'; +``` + +### [](#ui)UI + +To create an account via the UI: + +![Configure > Accounts](/assets/images/accountspage.png) + +1. Click **Configure** to open the configure space, then choose **Accounts** from the menu. +2. From the Accounts management page, choose **Create Account**. Type a name for the account and choose a region. You won’t be able to change the region for this account later, so choose carefully. +3. Choose **Create**. + +![Create account](../../assets/images/createaccount.png) + +Then you will see your new account on the **Accounts management** page. + +There can be up to 20 accounts per organization and you can use `CREATE ACCOUNT` 25 times. If you have a need for additional account creations beyond this limit, contact [Firebolt Support](https://docs.firebolt.io/godocs/Reference/help-menu.html) for assistance. Our team can provide guidance and, if appropriate, adjust your account settings to accommodate your needs. + +## [](#edit-an-existing-account)Edit an existing account + +Editing an account requires the account\_admin or org\_admin role. + +### [](#sql-1)SQL + +To edit an existing account using SQL, use the [ALTER ACCOUNT](/sql_reference/commands/data-definition/alter-account.html) statement. For example: + +``` +ALTER ACCOUNT dev RENAME TO staging; +``` + +### [](#ui-1)UI + +To edit an account via the UI: + +1. Click **Configure** to open the configure space, then choose **Accounts** from the menu. +2. Search for the relevant account using the top search filters or by scrolling through the accounts list. Hover over the right-most column to make the account menu appear then choose **Edit account**. Edit the name of the account. +3. Choose **Save**. + +![Edit account](../../assets/images/editaccount.png) + +## [](#delete-an-existing-account)Delete an existing account + +Deleting an account requires the account\_admin or org\_admin role. + +### [](#sql-2)SQL + +To delete an existing account using SQL, use the [DROP ACCOUNT](/sql_reference/commands/data-definition/drop-account.html) statement. For example: + +``` +DROP ACCOUNT dev; +``` + +### [](#ui-2)UI + +To delete an account via the UI: + +1. Click **Configure** to open the configure space, then choose **Accounts** from the menu. +2. Search for the relevant account using the top search filters or by scrolling through the accounts list. Hover over the right-most column to make the account menu appear then choose **Delete account**. If your account is not empty (for example, if it contains other objects such as users/databases/engines/etc.), you will need to confirm that you will also delete the sub-objects by selecting **Delete account sub-objects permanently**. +3. Choose **Confirm**. + +![Delete account](../../assets/images/deleteaccount.png) + +The account will be removed from the **Accounts management** page. + +## [](#switch-accounts)Switch accounts + +To switch the account you are using: + +### [](#ui-3)UI + +Click on your login button - the current account will be marked. Choose an account you would like to switch to. + +![Switch account](../../assets/images/switch_account.png) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_logins.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_logins.md new file mode 100644 index 0000000..a8eccfc --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_logins.md @@ -0,0 +1,80 @@ +# [](#manage-logins)Manage logins + +Logins are managed at the organization level and are used for authentication. Logins are a combination of a login name (email), first name, last name, and password, unless you’ve configured [Single Sign-On (SSO)](../security/sso/). Moreover, logins can be configured with advanced authentication properties such as [MFA](/Guides/security/enabling-mfa.html) and [network policies](/Guides/security/network-policies.html). Logins are linked to users at the account level, so that roles may be managed separately per account. A user must be linked to either a login or a service account for programmatic use to gain access to Firebolt. You can add, edit or delete logins using SQL or in the UI. + +To view all logins, click **Configure** to open the configure space, then choose **Logins** from the menu, or query the [information\_schema.logins](/sql_reference/information-schema/logins.html) view. + +Managing logins requires the org\_admin role. + +## [](#create-a-new-login)Create a new login + +### [](#sql)SQL + +To create a login using SQL, use the [CREATE LOGIN](/sql_reference/commands/access-control/create-login.html) statement. For example: + +``` +CREATE LOGIN "alexs@acme.com" WITH FIRST_NAME = 'Alex' LAST_NAME = 'Summers'; +``` + +### [](#ui)UI + +To create a login via the UI: + +1. Click **Configure** to open the configure space, then choose **Logins** from the menu: + + + +![Configure > Logins](/assets/images/loginspage.png) + +1. From the Logins management page, choose **Create Login**. +2. Enter the following details: + + - First name: specifies the first name of the user for the login. + - Last name: specifies the last name of the user for the login. + - Login name: specifies the login in the form of an email address. This must be unique within your organization. +3. Optionally, you can: + + - Associate a [network policy](/Guides/security/network-policies.html) with the login by choosing a network policy name under the **Network policy attached** field. + - Enable password login, which specifies if the login can authenticate Firebolt using a password. + - Enable multi-factor authentication (MFA). Read more about how to configure MFA [here](/Guides/security/enabling-mfa.html). + - Set the login as **organisation admin**, which enables fully managing the organization. + +## [](#edit-an-existing-login)Edit an existing login + +### [](#sql-1)SQL + +To edit an existing login using SQL, use the [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) statement. For example: + +``` +ALTER LOGIN "alexs@acme.com" SET NETWORK_POLICY = my_network_policy +``` + +### [](#ui-1)UI + +To edit a login via the UI: + +1. Click **Configure** to open the configure space, then choose **Logins** from the menu. +2. Search for the relevant login using the top search filters, or by scrolling through the list of logins. Hover over the right-most column to make the login menu appear, then choose **Edit login details**. Edit the desired fields and choose **Save**. + +Login name can not be changed for logins that were provisioned via SSO. + +![Edit login](../../assets/images/editlogin.png) + +## [](#deleting-an-existing-login)Deleting an existing login + +### [](#sql-2)SQL + +To delete an existing login using SQL, use the [DROP LOGIN](/sql_reference/commands/access-control/drop-login.html) statement. For example: + +``` +DROP LOGIN "alexs@acme.com"; +``` + +### [](#ui-2)UI + +To delete a login via the UI: + +1. Click **Configure** to open the configure space, then choose **Logins** from the menu. +2. Search for the relevant login using the top search filters, or by scrolling through the logins list. Hover over the right-most column to make the login menu appear, then choose **Delete login**. + +If the login is linked to any users, deletion will not be permitted. The login must be unlinked from all users before deletion. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_users.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_users.md new file mode 100644 index 0000000..1c26d19 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_managing_users.md @@ -0,0 +1,355 @@ +# [](#manage-users-and-roles)Manage users and roles + +In Firebolt, an **organization** can have multiple **accounts**, each serving as a separate workspace for managing resources and data. Within each account, users are created to control access, with their identities defined through logins or service accounts. **Logins** are associated with individual human users, each authenticated by unique credentials, allowing them to interact directly with Firebolt’s resources according to assigned roles. **Service accounts** provide programmatic access for applications and automated processes within the account, such as data pipelines or monitoring tools. Each login and service account is linked to specific **roles**, which define their permissions, ensuring that access is managed efficiently and securely across the organization. + +## [](#-logins)![Icon for a Firebolt login for human access.](../../assets/images/icon-login.png) Logins + +A **login** in Firebolt represents a **human user** and is associated with an individual’s credentials, identified by an **email address**. Logins are tied to user roles, which define what the individual can access or modify. A login is primarily used for human authentication and allows a user to access the platform, run queries, and interact with databases and other resources. For instance, a login object might be created for a specific person such as `kate@acme.com`, and this login is linked to roles that control permissions. + +## [](#-service-accounts)![Icon for a Firebolt service account for programmatic access.](../../assets/images/icon-service-account.png) Service accounts + +A **service account** represents a **machine or application** rather than a human user. It allows automated processes to authenticate and interact with Firebolt resources. A service account is used for programmatic access, such as in pipelines, monitoring systems, application data access, and scheduled queries. Service accounts are associated with roles just like logins but are designed to operate without human intervention. For example, a service account might be used for a data pipeline that regularly ingests data into Firebolt. Each service account must be associated with a user. For more information about how to create and manage service accounts, see [Manage programmatic access to Firebolt](/Guides/managing-your-organization/service-accounts.html). + +## [](#-users)![Icon for a Firebolt user.](../../assets/images/icon-user-bangs.png) Users + +A **user** is a distinct identity that interacts with the Firebolt platform. Each user is assigned specific **roles**, which determine what actions they can perform and which resources they can access. Users are essential for controlling access in Firebolt and are managed through **role-based access control (RBAC)**. Users authenticate via **logins** or **service accounts**, depending on whether they are human users or machine-based processes. + +A user must be associated with **either** a login or a service account, as follows: + +![A user must be associated with either a login or a service account.](../../assets/images/user_login_service-account.png) + +There can be multiple users per login or service account. Users are managed at the account level, as shown in the following diagram: + +![There can be multiple users per login, for human access, or per service account, for programmatic access](../../assets/images/multiple-users-per-login-or-sa.png) + +You can [add](#set-up-a-new-user), [edit](#edit-an-existing-user) or [delete](#deleting-an-existing-user) users using SQL in the **Develop Space** or using the user interface (UI) in the **Configure Space**. + +Managing roles requires the account\_admin role. For more information about roles, see the [Roles](/Overview/organizations-accounts.html#roles) section in [Organizations and accounts](/Overview/organizations-accounts.html), and the [Account permissions](/Overview/Security/Role-Based%20Access%20Control/account-permissions.html) section of [Role-based access control](/Overview/Security/Role-Based%20Access%20Control/) that specifies permissions for **CREATE USER**. + +**Topics** + +- [Manage users and roles](#manage-users-and-roles) + + - [Logins](#-logins) + - [Service accounts](#-service-accounts) + - [Users](#-users) + - [Set up a new user](#set-up-a-new-user) + + - [Set up a new user for programmatic access](#set-up-a-new-user-for-programmatic-access) + - [Set up a new user for human access](#set-up-a-new-user-for-human-access) + + - [Create a login](#create-a-login) + + - [Create a login using the UI](#create-a-login-using-the-ui) + - [Create a login using SQL](#create-a-login-using-sql) + - [Create a user](#create-a-user) + + - [Create a user using the UI](#create-a-user-using-the-ui) + - [Create a user using SQL](#create-a-user-using-sql) + - [Link the user to the login or service account](#link-the-user-to-the-login-or-service-account) + + - [Link a user using the UI](#link-a-user-using-the-ui) + - [Link a user using SQL](#link-a-user-using-sql) + - [Create a role](#create-a-role) + + - [Create a role using the UI](#create-a-role-using-the-ui) + - [Create a role using SQL](#create-a-role-using-sql) + - [Assign a role to a user](#assign-a-role-to-a-user) + + - [Assign a role using the UI](#assign-a-role-using-the-ui) + - [Assign a role using SQL](#assign-a-role-using-sql) + - [Edit an existing user](#edit-an-existing-user) + + - [Edit a user using the UI](#edit-a-user-using-the-ui) + - [Edit a user using SQL](#edit-a-user-using-sql) + - [Deleting an existing user](#deleting-an-existing-user) + + - [Delete a user using the UI](#delete-a-user-using-the-ui) + - [Delete a user using SQL](#delete-a-user-using-sql) + +## [](#set-up-a-new-user)Set up a new user + +To set up a new user, complete the following steps: + +1. Create a new login or service account. The following section provides information about creating a new login, for human access to Firebolt. If you want to set up a new user for programmatic access, see [Create a service account](/Guides/managing-your-organization/service-accounts.html#create-a-service-account). +2. Create a new user. +3. Link the user with a login or a service account. +4. Create a role. +5. Assign the role to the user. + +The following sections guide you through the previous steps. + +### [](#set-up-a-new-user-for-programmatic-access)Set up a new user for programmatic access + +![To set up a new user for programmatic access, first set up a service account.](../../assets/images/workflow-new-user-sa.png) + +To set up a user for programmatic access, [create a service account](/Guides/managing-your-organization/service-accounts.html#create-a-service-account), and then complete the steps in the following sections to [create a user](#create-a-user), [link the user](#link-the-user-to-the-login-or-service-account) to a service account, [create a role](#create-a-role), and [assign the role](#assign-a-role-to-a-user) to the user. + +### [](#set-up-a-new-user-for-human-access)Set up a new user for human access + +#### [](#create-a-login)Create a login + +![To set up a user for human access, first create a login.](../../assets/images/workflow-new-user-create-login.png) + +A login is an **email** that is used for authentication. A login can be associated with multiple accounts. When you set up a new user, you must create either a login or service account for them. Create a login if you want to associate a user with human access to Firebolt. [Create a service account](/Guides/managing-your-organization/service-accounts.html#create-a-service-account) for programmatic access. You will link the user to **either** a login or a service account. + +##### [](#create-a-login-using-the-ui)Create a login using the UI + +Login to [Firebolt’s Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see the [Get Started](/Guides/getting-started/) guide. If you encounter any issues, reach out to [support@firebolt.io](mailto:support@firebolt.io) for help. Then, do the following: + +1. Select the Configure icon (![The Firebolt Configure Space icon.](../../assets/images/configure-icon.png)) in the left navigation pane to open the **Configure Space**. +2. Select **Logins**. +3. Select **Create Login**. +4. In the **Create login** window that pops up, enter the following: + + 1. First Name - The first name of the user. + 2. Last Name - The last name of the user. + 3. Login Name - The email address of the user. +5. Select a network policy from the drop-down list. You can choose **Default** or create your own. The default network policy accepts traffic from any IP address. For more about network policies, including how to create a new policy, see [Manage network policies](/Guides/security/network-policies.html). +6. Toggle the following options on or off to select the following: + + 1. Is password enabled - Toggle **on** to require authentication using a password. + 2. Is MFA enabled - Toggle **on** to require authentication using multi-factor authentication (MFA). + 3. Is organization admin - Toggle **on** to grant that login permissions associated with an **Organization Admin**. A user must have organization administrative privileges to manage logins and service accounts. For more information about organization administrative privileges and other roles, see the [Roles](/Overview/organizations-accounts.html#roles) section in [Organizations and accounts](/Overview/organizations-accounts.html). +7. Select **Create**. + +##### [](#create-a-login-using-sql)Create a login using SQL + +Login to [Firebolt’s Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see the [Get Started](/Guides/getting-started/) guide. If you encounter any issues, reach out to [support@firebolt.io](mailto:support@firebolt.io) for help. Then, do the following: + +1. Select the **Develop** icon (![The Firebolt Develop Space icon](../../assets/images/develop-icon.png)). + + By default, when you login to **Firebolt’s Workspace** for the first time, Firebolt creates a tab in the **Develop Space** called **Script 1**. The following apply: + + - The database that **Script 1** will use is located directly below the tab name. If you want to change the database, select another database from the drop-down list. + - An engine must be running to process the script in a selected tab. The name and status of the engine that **Script 1** uses for computation is located to the right of the current selected database. If the engine has auto-start set to `TRUE`, it will start from a stopped state. For more information about auto-start, see [Immediately Starting or Automatically Stopping an Engine](/Guides/operate-engines/working-with-engines-using-ddl.html#automatically-start-or-stop-an-engine). +2. Select **system** from the drop-down arrow next to the engine name. The system engine is always running, and you can use it to create a login. You can also use an engine that you create. +3. Use the syntax in the following example code to create a login in the SQL Script Editor: + + ``` + CREATE LOGIN "" + WITH FIRST_NAME = + LAST_NAME = ; + ``` + +#### [](#create-a-user)Create a user + +![To set up a new user, after you create a login, create a user.](../../assets/images/workflow-new-user-create-user.png) + +After you create a login, the next step is to create a user. + +##### [](#create-a-user-using-the-ui)Create a user using the UI + +1. Select the **Govern** icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left sub-menu bar. +3. Select the **+ Create User** button at the top right of the **Govern Space**. +4. In the **Create User** window, enter the following: + + 1. **User name** - The name of the user to associate with the login. This name can be any string, excluding spaces, and special characters such as exclamation points (!), percent signs (%), at sign(@), dot sign (.), underscore sign (\_), minus sign (-), and asterisks (\*). + 2. **Assign to** - Use the dropdown to assign the user to one of the following: + i. **Unassigned** - No specific assignment. + + ii. **Login** - Associates the user with a login name or email address. After selecting this option, you will be prompted to choose the login name or email address. + + iii. **Service Account** - Associates the user with a service account. After selecting this option, you will be prompted to choose a service account name. + 3. **Role** - Select the role you want to assign to the user. If no role is specified, the user is automatically granted a [public role](/Overview/organizations-accounts.html#public-role). For more information about roles, see the [Roles](/Overview/organizations-accounts.html#roles) section in [Organization and accounts](/Overview/organizations-accounts.html). + 4. **Default Database** - Choose a database to associate with the user, setting it as their default for access. + 5. **Default Engine** - Choose a default processing engine to associate with the user. +5. Select **Create new user** to save the configuration. + +##### [](#create-a-user-using-sql)Create a user using SQL + +Use the syntax in the following example code and the [CREATE USER](/sql_reference/commands/access-control/create-user.html) statement to create a user in the **SQL Script Editor** in the **Develop Space**: + +``` +CREATE USER ; +``` + +You can also create a user and link it to a login simultaneously as shown in the following code example: + +``` +CREATE USER WITH LOGIN = ""; +``` + +Create a user and link it to a service account at the same time as shown in the following code example: + +``` +CREATE USER WITH SERVICE_ACCOUNT= +``` + +#### [](#link-the-user-to-the-login-or-service-account)Link the user to the login or service account + +![To set up a new user, after you create a user, link it to the login or service account.](../../assets/images/workflow-new-user-link-login.png) + +If the user wasn’t associated with a login or service account when they were created, you must link them. + +##### [](#link-a-user-using-the-ui)Link a user using the UI + +1. Select the Govern icon (![The Firebolt Govern Space icon](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the user that you need to link to a login. +4. Select **Edit user details**. +5. If you want to link the user to a login for human access, select **Login** from the drop-down list next to **Assign to**. If you want to link the user to a service account for programmatic access, select **Service Account** from the drop-down list next to **Assign to**. +6. If you want to link the user to a login for human access, select the name of the login to associate with the user from the drop-down list under **Login name**. If you want to link the user to a service account for programmatic access, select a name from the drop-down list next to **Service account name**. This drop-down list contains only login accounts that are not already assigned to a user in the current account. +7. Select **Save**. + +##### [](#link-a-user-using-sql)Link a user using SQL + +Use the syntax in the following example code and the [ALTER\_USER](/sql_reference/commands/access-control/alter-user.html) statement to link a user to a login in the **SQL Script Editor** in the **Develop Space**: + +``` +ALTER USER SET LOGIN = ""; +``` + +The following code links a user to a service account: + +``` +ALTER USER SET SERVICE_ACCOUNT = +``` + +#### [](#create-a-role)Create a role + +![To set up a new user, after you link the user, create a role.](../../assets/images/workflow-new-user-create-role.png) + +If you don’t already have a role that you want to assign to a user, you can create a role to define what actions users can perform. For more information, see [Roles](/Overview/organizations-accounts.html#roles). + +##### [](#create-a-role-using-the-ui)Create a role using the UI + +1. Select the Govern icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Roles** from the left sub-menu bar. +3. Select the **+ New Role** button at the top right of the **Govern Space**. +4. In the left sub-menu bar, enter the following: + + 1. Role name - The name of the role that you want to create. You can use this role to grant privileges for more than one user. +5. Select **Databases** in the left sub-menu bar, and select the following in **Database privileges**: + + 1. **Create database** - Toggle **on** to allow the user to create any database in the account. + 2. **Modify any database** - Toggle **on** to allow the user to modify any database in the account, or keep the option **off** to select the specific database the user can modify. + 3. **Usage any database** - Toggle **on** to allow the user to use any database in the account, or keep the option **off** to select the specific database the user can use. + 4. If you didn’t specify using or modifying all databases, select the checkbox next to the specific database that you want to grant the user access to modify or use. +6. Select **Engines** in the left sub-menu bar, and select the following in **Engine privileges**: + + 1. **Create engine** - Toggle **on** to allow the user to create any engine in the account. + 2. **Modify any engine** - Toggle **on** to allow the user to modify any engine in the account, or keep the option **off** to select the specific engine the user can modify. + 3. **Operate any engine** - Toggle **on** to allow the user to stop or start any engine in the account, or keep the option **off** to select the specific engine the user can start or stop. Any running engine that is not the system engine accumulates usage costs. + 4. **Usage any engine** - Toggle **on** to allow the user to use any engine in the account, or keep the option **off** to select the specific engine the user can use. +7. Select **Create**. + +##### [](#create-a-role-using-sql)Create a role using SQL + +Use the syntax in the following example code and the [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) and [GRANT](/sql_reference/commands/access-control/grant.html) statements to create a role in the **SQL Script Editor** in the **Develop Space**: + +``` +CREATE ROLE ; +``` + +Use the following code to grant engine **access to a role**: + +``` +GRANT USAGE ON ENGINE TO +``` + +Use the following code example to grant a role permission to **modify a database**: + +``` +GRANT MODIFY ON DATABASE TO +``` + +Use the following code example to grant a role permission to **create objects inside the public schema**: + +``` +GRANT CREATE ON SCHEMA public TO +``` + +Use the following code to grant a role permission to **access the public schema** in a database: + +``` +GRANT USAGE ON SCHEMA public TO +``` + +Use the following code example to grant a role permission to **read data from a specified table**: + +``` +GRANT SELECT ON TABLE TO +``` + +For more information about role-based access, see [Manage role-based access control](/Guides/security/rbac.html). + +#### [](#assign-a-role-to-a-user)Assign a role to a user + +![To set up a new user, after creating a role, assign it to a user.](../../assets/images/workflow-new-user-assign.png) + +You can assign a new role to the user or change the role assigned to the user from the default **public** role to grant them specific permissions. A user can have multiple roles. + +##### [](#assign-a-role-using-the-ui)Assign a role using the UI + +1. Select the Govern icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the user that you need to link to a login. +4. Select **Edit user details**. +5. Select the checkbox next to the role that you want to assign to the user from the list under **Assign Roles**. +6. Select **Save**. + +##### [](#assign-a-role-using-sql)Assign a role using SQL + +Use the syntax in the following example code and the [GRANT](/sql_reference/commands/access-control/grant.html) statement to assign a role in the **SQL Script Editor** in the **Develop Space**: + +``` +GRANT TO USER ; +``` + +You can use `GRANT` to assign a role to another role as follows: + +``` +GRANT TO ROLE +``` + +## [](#edit-an-existing-user)Edit an existing user + +You can alter a user’s name, login or service account that they are associated with, their default database, and engine. + +### [](#edit-a-user-using-the-ui)Edit a user using the UI + +1. Select the Govern icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the user that you need to edit. +4. Select **Edit user details**. +5. Edit the desired fields. +6. Select **Save**. + +### [](#edit-a-user-using-sql)Edit a user using SQL + +Use the [ALTER USER](/sql_reference/commands/access-control/alter-user.html) statement to change a user’s information in the **SQL Script Editor** in the **Develop Space**. + +The following code example changes a user’s name: + +``` +ALTER USER "alex" RENAME TO "alexs"; +``` + +The following code example changes a user’s login: + +``` +ALTER USER alex SET LOGIN="alexs@acme.com"; +``` + +Users can modify most of their own account settings without requiring [RBAC](/Overview/Security/Role-Based%20Access%20Control/#role-based-access-control-rbac) permissions, except when altering [LOGIN](/Guides/managing-your-organization/managing-logins.html) configurations or a [SERVICE ACCOUNT](/Guides/managing-your-organization/service-accounts.html). + +## [](#deleting-an-existing-user)Deleting an existing user + +You can delete a user using either the UI or with SQL. The delete operation is irreversible. + +### [](#delete-a-user-using-the-ui)Delete a user using the UI + +1. Select **Users** from the left sub-menu bar. +2. Select the three horizontal dots (…) to the right of the user that you need to delete. +3. Select **Delete user**. +4. Select **Confirm** to delete the user. This operation is irreversible. + +### [](#delete-a-user-using-sql)Delete a user using SQL + +Use the syntax in the following example code and the the [DROP USER](/sql_reference/commands/access-control/drop-user.html) statement to delete an existing user in the **SQL Script Editor** in the **Develop Space**: + +``` +DROP USER "alex"; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_service_accounts.md b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_service_accounts.md new file mode 100644 index 0000000..d28a1b1 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_managing_your_organization_service_accounts.md @@ -0,0 +1,219 @@ +# [](#manage-programmatic-access-to-firebolt)Manage programmatic access to Firebolt + +Service accounts in Firebolt are used exclusively for **programmatic access**, allowing applications, scripts, or automated systems to securely interact with Firebolt resources. Unlike regular logins for individuals, each service account has an ID and a secret for authentication. + +To manage service accounts, you must have the **organization admin** role, which grants full administrative control over an organization in Firebolt, including managing logins, network policies, and accounts. This role ensures proper access management, security, and compliance with organizational policies. + +Administrators use service accounts to control how external tools and applications access Firebolt, ensuring access is limited to necessary resources. Service accounts are associated with specific users within the organization, giving administrators control over what data and permissions they have. This helps enforce security rules, track usage, and audit system access in a clear and controlled way. + +You can access a Firebolt database programmatically using either of the following: + +- The [Firebolt API](https://docs.firebolt.io/godocs/Guides/query-data/using-the-api.html#firebolt-api) - directly interacts with Firebolt’s data warehouse using HTTP requests. +- The [Firebolt drivers](https://docs.firebolt.io/godocs/Guides/developing-with-firebolt/) - use a third party tool or programming language to integrate with Firebolt’s data warehouse. Firebolt supports several languages including Python, Node, .Net, and Go. + +Service accounts must be manually linked to a [user account](https://docs.firebolt.io/godocs/Guides/managing-your-organization/managing-users.html) after they have been created. The service account provides access to the organization, and the associated user provides access to an account within the organization. To use Firebolt programmatically, you must authenticate with an ID and a secret. These are generated when you create a service account. You can add, delete and generate secrets for service accounts using SQL scripts in the **Develop Space** or through the user interface (UI) in the **Configure Space**. + +Follow these steps to **gain programmatic access to Firebolt**: + +1. [Create a service account](#create-a-service-account). +2. [Get a service account ID](#get-a-service-account-id). +3. [Generate a secret](#generate-a-secret). +4. [Create a user](#create-a-user). + +After completing the previous steps, the following sections show you how to **manage programmatic access through your service account**: + +1. [Test your new service account](#test-your-new-service-account). +2. [Edit your service account](#edit-your-service-account). +3. [Delete your service account](#delete-your-service-account). + +## [](#create-a-service-account)Create a service account + +![The first step to obtain programmatic access is to create a service account.](../../assets/images/service_accounts_create.png) + +You can create a service account using SQL scripts in the **Develop Space** or through the user interface (UI) in the **Configure Space**. + +### [](#create-a-service-account-using-the-ui)Create a service account using the UI + +Login to Firebolt’s [Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see the [Get Started](https://docs.firebolt.io/Guides/getting-started/) guide. If you encounter any issues, reach out to support@firebolt.io for help. Then, do the following: + +1. Select the **Configure** icon (![The icon to open the Configure Space.](../../assets/images/configure-icon.png)) in the left navigation pane to open the **Configure Space**. +2. Select Service accounts on the left sub-menu bar. +3. Select the **+ Create a service account** button at the top right of the **Configure Space**. +4. In the **Create a service account** window that appears, enter the following: + + - Name - The name of the service account. + - [Network policy](https://docs.firebolt.io/Guides/security/network-policies.html) - A security feature that defines a list of allowed and blocked IP addresses or ranges to manage access at the organization level, login level, or for service accounts. + - Description - A description for the service account. +5. Toggle **Is organization admin** to designate the service account as an account with administrative privileges in your organization. In Firebolt, the organization admin role provides full administrative privileges over the organization, allowing management of users, service accounts, network policies, and other organization-wide settings. +6. Select **Create** to finish creating the service account. + +### [](#create-a-service-account-using-sql)Create a service account using SQL + +Login to Firebolt’s [Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see [Get Started](https://docs.firebolt.io/Guides/getting-started/). If you encounter any issues, reach out to support@firebolt.io for help. Then, do the following: + +1. Select the **Develop** icon (![The icon to open the Develop Space.](../../assets/images/develop-icon.png)). +2. By default, when you login to **Firebolt’s Workspace** for the first time, Firebolt creates a tab in the **Develop Space** called **Script 1**. The following apply: + + - The database that **Script 1** will run using is located directly below the tab name. If you want to change the database, select another database from the drop-down list. + - An engine must be running to process the script in a selected tab. The name and status of the engine that **Script 1** uses for computation is located to the right of the current selected database. + + Select **system** from the drop-down arrow next to the engine name. The system engine is always running, and you can use it to create a service account. You can also use an engine that you create. +3. Use the syntax in the following example code to create a service account in the **SQL Script Editor**: + + ``` + CREATE SERVICE ACCOUNT IF NOT EXISTS "service_account_name" WITH DESCRIPTION = 'service account 1'; + ``` + + For more information, see the [CREATE SERVICE ACCOUNT](https://docs.firebolt.io/sql_reference/commands/access-control/create-service-account.html) command. + +## [](#get-a-service-account-id)Get a service account ID + +![The second step to obtain programmatic access is to get a service account ID.](../../assets/images/service_accounts_id.png) + +Your new service account is listed in the **Configure Space** in the **Service accounts** on the left sub-menu bar. Note the ID of this service account under the **ID** column in the **Service accounts management** table. You will use this ID for authentication. + +## [](#generate-a-secret)Generate a secret + +![The third step to obtain programmatic access is to generate a secret.](../../assets/images/service_accounts_secret.png) + +Each service account requires a secret to access Firebolt programmatically. You can generate a secret using SQL scripts in the **Develop Space** or through the UI in the **Configure Space**. + +If you generate a new secret, the previous secret for your service account will no longer work inside your applications or services. + +### [](#generate-a-secret-using-the-ui)Generate a secret using the UI + +1. Select the **Configure** icon (![The icon to open the Configure Space.](../../assets/images/configure-icon.png)) in the left navigation pane to open the **Configure Space**. +2. Select **Service accounts** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the service account that you want to generate a secret. +4. Select **Create a new secret**. +5. Select the copy icon from the pop-up window **New secret for service account** that displays the new secret to copy the secret to your clipboard. This secret is not stored anywhere. Once you close the pop-up window, you will no longer be able to retrieve this secret. + +### [](#generate-a-secret-using-sql)Generate a secret using SQL + +Use the syntax in the following example code to generate a secret for a service account in the **SQL Script Editor** in the **Develop Space**: + +``` +CALL fb_GENERATESERVICEACCOUNTKEY('service_account_name') +``` + +The `CALL fb_GENERATESERVICEACCOUNTKEY` command in the previous code example returns both the service account ID and secret. Once you retrieve this secret, you cannot retrieve it again later. + +## [](#create-a-user)Create a user + +![The last step to obtain programmatic access is to create a user.](../../assets/images/service_accounts_user.png) + +Once you create the service account, it must be associated with a user. Your organization may have multiple Firebolt accounts, each with its own set of resources, databases, and users. Each service account can only be linked to one user per Firebolt account, but it can be assigned to different users across multiple accounts. This setup allows the service account to work across multiple accounts, while ensuring it is linked to only one user per account. + +You can create a user using SQL scripts in the **Develop Space** or through the UI in the **Govern Space**. + +### [](#create-a-user-using-the-ui)Create a user using the UI + +1. Select the Govern icon (![The icon to open the Govern Space.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select Users from the left sub-menu bar. +3. Select the **+ Create User** button at the top right of the **Govern Space**. +4. In the **Create User** window, enter the following: + + - **User Name** - The name of the user to associate with the service account. + - **Default Database** - (Optional) The name of the database that is associated with the user. + - **Default Engine** - (Optional) The name of the engine that is associated with the user. + + Toggle the radio button next to **Associate a service account**. +5. Select the name of the service account to associate with the user from the drop-down list under **Service Account Associated**. This drop-down list contains only service accounts that are not already assigned to a user in the current account. + +### [](#create-a-user-using-sql)Create a user using SQL + +Use the syntax in the following example code to generate a secret for a service account in the **SQL Script Editor** in the **Develop Space**: + +``` +CREATE USER alex WITH SERVICE_ACCOUNT = service_account_name; +``` + +The previous code example creates a user with the username `alex`, and associates it with a service account by its `service_account_name`. + +For more information, see [Manage users](https://docs.firebolt.io/godocs/Guides/managing-your-organization/managing-users.html#create-a-new-user). + +## [](#test-your-new-service-account)Test your new service account + +Once you have set up your service account, use the following code example to send a request to Firebolt’s REST API, and receive an authentication token: + +``` +curl -X POST --location 'https://id.app.firebolt.io/oauth/token' \ +--header 'Content-Type: application/x-www-form-urlencoded' \ +--data-urlencode 'grant_type=client_credentials' \ +--data-urlencode 'audience=https://api.firebolt.io' \ +--data-urlencode "client_id=${service_account_id}" \ +--data-urlencode "client_secret=${service_account_secret}" +``` + +In the previous code example, use the service account ID and secret from the previous **Generate a secret** step for `service_account_id` and `service_account_secret`. + +The following is an example response to the REST API request: + +**Response:** + +``` +{ + "access_token":"eyJz93a...k4laUWw", + "token_type":"Bearer", + "expires_in":86400 +} +``` + +In the previous example response, the following apply: + +- The `access_token` is a unique token that authorizes your API requests that acts as a temporary key to access resources or perform actions. You can use this token to authenticate with Firebolt’s platform until it expires. +- The `token_type` is `Bearer`, which means that the access token must be included in an authorization header of your API requests using the format: `Authorization: Bearer `. +- The token `expires_in` indicates the number of seconds until the token expires. + +Use the returned `access_token` to authenticate with Firebolt. + +## [](#edit-your-service-account)Edit your service account + +You can edit your service account using SQL scripts in the **Develop Space** or through the UI in the **Configure Space**. + +### [](#edit-your-service-account-using-the-ui)Edit your service account using the UI + +1. Select **Configure** icon (![The icon to open the Configure Space.](../../assets/images/configure-icon.png)) in the left navigation pane to open the **Configure Space**. +2. Select **Service accounts** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the service account that you want to edit. +4. Select **Edit service account**. + + In the **Edit service account** pop-up window, you can edit the following: + + - **Name** - The name of the service account. + - **Network policy** - The network policy associated with the service account that defines whether an IP address is allowed or blocked from interacting with Firebolt resources. + - **Description** - The description of the service account. + - **Is organization admin** - Toggle on or off to identify the service account as an organizational admin. + + Select **Save** to keep your edits. + +### [](#edit-your-service-account-using-sql)Edit your service account using SQL + +Use [ALTER SERVICE ACCOUNT](https://docs.firebolt.io/sql_reference/commands/access-control/alter-service-account.html), as shown in the following example to edit a service account in the **SQL Script Editor** in the **Develop Space**: + +``` +ALTER SERVICE ACCOUNT service_account_name SET NETWORK_POLICY = my_network_policy +``` + +In the previous code example, the service account’s network policy is set to a new value. + +## [](#delete-your-service-account)Delete your service account + +You can delete your service account using SQL scripts in the **Develop Space** or through the UI in the **Configure Space**. + +{: .note} You can’t delete a service account if it is linked to users. You must first unlink the service account from all users. You can view all users linked to a service account by navigating to the **Users** section in the **Govern Space**. In the **Users Management** table, each **User Name** has the name of a **Service Account** if it is associated with one. To unlink a user account, select the three horizontal dots (…) to the right of the **User Name**, and select **Edit user details**. Then, toggle off **Associate a service account**. + +### [](#delete-your-service-account-using-the-ui)Delete your service account using the UI + +1. Select the **Configure** icon (![The icon to open the Configure Space.](../../assets/images/configure-icon.png)) in the left navigation pane to open the **Configure Space**. +2. Select **Service accounts** from the left sub-menu bar. +3. Select the three horizontal dots (…) to the right of the service account that you want to delete. +4. Select **Delete service account**. + +### [](#delete-your-service-account-using-sql)Delete your service account using SQL + +Use [DROP SERVICE ACCOUNT](https://docs.firebolt.io/sql_reference/commands/access-control/drop-service-account.html), as shown in the following example to delete a service account in the **SQL Script Editor** in the **Develop Space**: + +``` +DROP SERVICE ACCOUNT service_account_name; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_operate_engines.md b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_operate_engines.md new file mode 100644 index 0000000..352cd8a --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_operate_engines.md @@ -0,0 +1,10 @@ +# [](#operate-engines)Operate engines + +Learn how to work with engines using both the UI and SQL, how to size and monitor engines, how to use RBAC to govern engines and how to use the system engine. + +* * * + +- [Work with engines](/Guides/operate-engines/working-with-engines-using-ddl.html) +- [Sizing Engines](/Guides/operate-engines/sizing-engines.html) +- [System Engine](/Guides/operate-engines/system-engine.html) +- [Governing Engines](/Guides/operate-engines/rbac-for-engines.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_rbac_for_engines.md b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_rbac_for_engines.md new file mode 100644 index 0000000..a1b20ae --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_rbac_for_engines.md @@ -0,0 +1,59 @@ +# [](#governing-engines)Governing Engines + +Use [Role Based Access Control](/Guides/security/rbac.html) (RBAC) to granularly control which users within an account can create new engines, use, operate, monitor and modify existing engines. Accordingly, Firebolt provides CREATE, USAGE, OPERATE, MONITOR and MODIFY permissions to control these actions. You can use RBAC to control whether a user has permissions to perform these actions for specific engines or for all engines in a given account. Note that permissions for CREATE ENGINE can only be granted at the account level. + +Follow the below steps to control what permissions a user has for a given engine or for any engine within an account: + +- Create a new role +- Grant permissions to the role +- Assign role to a user + +**Example 1:** We want to provide a user kate with permissions to create and operate engines + +``` +CREATE ROLE prodAdminRole; + +GRANT CREATE ENGINE ON ACCOUNT myAccount IN ORGANIZATION myOrg TO prodAminRole; + +GRANT OPERATE ON ENGINE myEngine IN ACCOUNT myAccount TO prodAdminRole; + +GRANT ROLE prodAdminRole TO USER kate; +``` + +**Example 2:** We want to provide a user kate with permissions to only use and operate engines + +``` +CREATE ROLE prodAdminRole; + +GRANT USAGE ENGINE ON myEngine IN ACCOUNT myAccount TO prodAminRole; + +GRANT OPERATE ON ENGINE myEngine IN ACCOUNT myAccount TO prodAdminRole; + +GRANT ROLE prodAdminRole TO USER kate; +``` + +**Example 3:** We want to provide a user kate with permissions to use, operate and monitor engine metrics + +``` +CREATE ROLE prodAdminRole; + +GRANT USAGE ENGINE ON myEngine IN ACCOUNT myAccount TO prodAminRole; + +GRANT MONITOR USAGE ON ENGINE myEngine IN ACCOUNT myAccount TO prodAminRole; + +GRANT OPERATE ON ENGINE myEngine IN ACCOUNT myAccount TO prodAdminRole; + +GRANT ROLE prodAdminRole TO USER kate; +``` + +**Example 4:** We want to provide a user kate with permissions to create and modify engines + +``` +CREATE ROLE prodAdminRole; + +GRANT CREATE ENGINE ON ACCOUNT myAccount IN ORGANIZATION myOrg TO prodAminRole; + +GRANT MODIFY ON ENGINE myEngine IN ACCOUNT myAccount TO prodAdminRole; + +GRANT ROLE prodAdminRole TO USER kate; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_sizing_engines.md b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_sizing_engines.md new file mode 100644 index 0000000..6e16f03 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_sizing_engines.md @@ -0,0 +1,45 @@ +# [](#sizing-engines)Sizing Engines + +Selecting an appropriate engine size for your workload depends on multiple factors such as the size of your active dataset, latency and throughput requirements of your workload, your considerations for price-performance and the number of users and queries your workload is expected to handle concurrently. Our guidance is to start small with an engine size that fits your active dataset and monitor the workload using the engine observability metrics (see below). Based on these metrics, you can then dynamically resize your engine to meet the needs of your workload. + +If your workload requires high processing power relative to data size, use a compute-optimized node type. These nodes have the approximately same processing power as storage-optimized nodes but have less memory, cache space, and cost. + +## [](#dimensions-of-engine-sizing)Dimensions of engine sizing + +Firebolt allows you to change: + +- The type of nodes in your engine. +- The compute family of nodes in your engine. +- The number of nodes in a cluster of your engine. +- The number of copies of that cluster in your engine. + +See the [engine fundamentals](/Overview/engine-fundamentals.html) page for details. + +## [](#using-observability-metrics-to-resize-an-engine)Using Observability Metrics to Resize an Engine + +Firebolt provides engine observability metrics that give visibility into how the engine resources are being utilized by your workloads. Use the [Information\_Schema.engine\_metrics\_history](/sql_reference/information-schema/engine-metrics-history.html) view to understand how much CPU, RAM, and disk are utilized by your workloads. In addition, this view also provides details on how often your queries hit the local cache and how much of your query data is spilling onto the disk. These metrics can help you decide whether your engine needs a different node type and whether you need to add more nodes to improve the query performance. Use the [Information\_Schema.engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view to understand how many queries are waiting in the queue to be run. If there are a number of queries still waiting to be run, adding another cluster to your engine may help improve the query throughput. + +## [](#initial-sizing)Initial Sizing + +**ELT Workloads** + +For the ELT workloads, the engine size would depend on the number of files and the size of the files used to ingest the data. You can parallelize the ingest process with additional nodes, which can provide improved performance. + +**Queries** + +To correctly size an engine for querying data, there are several factors to consider: + +- The size of frequently accessed data under your query pattern. More data will require a engine with a larger cache size. +- The relative amount of processing performed within the queries in your query pattern. More complex queries will generally require more CPU cores. +- The Queries Per Second (QPS) of your workload. At higher QPS you may need to enable auto-scaling or multiple clusters on your engine. +- The number of requests outstanding or the time submitted queries run. Longer running queries may raise the requirements of the engine to need instance types with more memory or more clusters in the engine. + +For query processing, our recommendation is to start with a S or M storage-optimized instance type. Then, run a checksum over the dataset you expect to be queried frequently. Firebolt Engines cache the data locally, which helps serve queries at low latencies. The cache size provided by the engines varies depending on the type of node used in your engines, with each size having twice the cache of the next smallest size. Compute-optimized instances have approximately one quarter of the cache size of storage-optimized instances. After the checksum, you can use [Information\_Schema.engine\_metrics\_history](/sql_reference/information-schema/engine-metrics-history.html) to see the cache utilization percentage. If an acceptable percentage of your active dataset fits, you can then run queries at your expected QPS on the engine. + +**TIP:** You can use [Multi-Cluster Engine Warmup](/Reference/system-settings.html#multi-cluster-engine-warmup) to submit your checksum queries to all clusters in a multi-cluster engine. + +Small and medium storage-optimized engines are available for use right away. Compute-optimized instance types are available, but may see longer engine start times. If you want to use a large or extra-large engine, reach out to [support@firebolt.io](mailto:support@firebolt.io). + +**TIP:** You also have the option to run your workload simultaneously on engines with different configurations and use these metrics to identify which configuration best fits your needs. + +You will need to have the appropriate [RBAC](/Guides/operate-engines/rbac-for-engines.html) permissions to use the engine observability metrics. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_system_engine.md b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_system_engine.md new file mode 100644 index 0000000..92f9b88 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_system_engine.md @@ -0,0 +1,111 @@ +# [](#system-engine)System Engine + +Firebolt’s system engine enables running various metadata-related queries without having to start an engine. The system engine is always available for you in all databases to select and use. + +The system engine supports running the following commands: + +- All [access control](/sql_reference/commands/access-control/) commands +- All [engine](/sql_reference/commands/engines/) commands +- Most [data definition](/sql_reference/commands/data-definition/) commands. The following commands are not supported: + + - [ALTER TABLE DROP PARTITION](/sql_reference/commands/data-definition/alter-table.html) + - [CREATE AGGREGATING INDEX](/sql_reference/commands/data-definition/create-aggregating-index.html) + - [CREATE EXTERNAL TABLE](/sql_reference/commands/data-definition/create-external-table.html) + - [CREATE TABLE AS SELECT](/sql_reference/commands/data-definition/create-fact-dimension-table-as-select.html) +- Most [metadata](/sql_reference/commands/metadata/) commands. The following commands are not supported: + + - [SHOW CACHE](/sql_reference/commands/metadata/show-cache.html) +- Non-data-accessing [SELECT](/sql_reference/commands/queries/select.html) queries like `SELECT CURRENT_TIMESTAMP()` +- [SELECT](/sql_reference/commands/queries/select.html) queries on some [information\_schema](/sql_reference/information-schema/) views: + + - [information\_schema.accounts](/sql_reference/information-schema/accounts.html) + - [information\_schema.applicable\_roles](/sql_reference/information-schema/applicable-roles.html) + - [information\_schema.transitive\_applicable\_roles](/sql_reference/information-schema/transitive-applicable-roles.html) + - [information\_schema.columns](/sql_reference/information-schema/columns.html) + - [information\_schema.catalogs](/sql_reference/information-schema/catalogs.html) + - [information\_schema.enabled\_roles](/sql_reference/information-schema/enabled-roles.html) + - [information\_schema.engines](/sql_reference/information-schema/engines.html) + - [information\_schema.indexes](/sql_reference/information-schema/indexes.html) + - [information\_schema.logins](/sql_reference/information-schema/logins.html) + - [information\_schema.network\_policies](/sql_reference/information-schema/network_policies.html) + - [information\_schema.service\_accounts](/sql_reference/information-schema/service-accounts.html) + - [information\_schema.tables](/sql_reference/information-schema/tables.html) + - [information\_schema.users](/sql_reference/information-schema/users.html) + - [information\_schema.views](/sql_reference/information-schema/views.html) + +## [](#using-the-system-engine-via-the-firebolt-manager)Using the system engine via the Firebolt manager + +1. In the Firebolt manager, choose the Databases icon in the navigation pane. +2. Click on the SQL Workspace icon for the desired database. In case you have no database in your account - create one first. +3. From the engine selector in the SQL Workspace, choose System Engine, then run one of the supported queries. + +## [](#using-the-system-engine-via-sdks)Using the system engine via SDKs + +### [](#python-sdk)Python SDK + +Connect via the connector without specifying the engine\_name. Database parameter is optional. + +System engine does not need a database defined. If you wish to connect to an existing database and run metadata queries with the system engine, just specify the name of your database. + +**Example** + +``` +from firebolt.db import connect +from firebolt.client import DEFAULT_API_URL +from firebolt.client.auth import ClientCredentials + +client_id = "" +client_secret = "" +account_name = "" + +with connect( + database="", # Omit this parameter if you don't need db-specific operations + auth=ClientCredentials(client_id, client_secret), + account_name=account_name, + api_endpoint=DEFAULT_API_URL, +) as connection: + + cursor = connection.cursor() + + cursor.execute("SHOW CATALOGS") + + print(cursor.fetchall()) +``` + +Guidance on creating service accounts can be found in the [service account](/Guides/managing-your-organization/service-accounts.html) section. + +### [](#other-sdks)Other SDKs + +Any other Firebolt connector can also be used similarly, as long as the engine name is omitted. + +## [](#system-engine-limitations)System Engine Limitations + +### [](#supported-queries-for-system-engine)Supported queries for system engine + +System engine only supports running the metadata-related queries listed above. Additional queries will be supported in future versions. + +### [](#rate-limits-for-system-engines)Rate Limits for System Engines + +To ensure fair and consistent access to the System Engine for all users, we have introduced rate limits that govern resource usage per account. These limits are designed to prevent resource contention and ensure optimal performance for everyone. + +When the rate limits are exceeded on the system engine, the system will return the following error: `429: Account system engine resources usage limit exceeded`. This error typically occurs when an account submits an excessive number of queries or executes highly complex queries that surpass the allocated resource thresholds. + +**What to Do If You Encounter Rate Limits** + +If you receive the 429 error, consider these steps to resolve the issue: + +- Switch to a User Engine: Offload your workloads to a dedicated User Engine if possible. User Engines do not have the same rate limits, making them better suited for higher workloads or complex operations. +- Review your query patterns and ensure they are not unnecessarily complex or resource-intensive. Use best practices to write efficient queries that minimize resource consumption. +- Contact Support: If you believe your account has been rate-limited unfairly or you anticipate requiring higher limits, reach out to our support team to discuss adjusting your account’s thresholds. + +**Best Practices to Avoid Rate Limits** + +- Avoid running multiple concurrent queries that heavily use system resources. +- Leverage Firebolt’s indexing and other optimization features to streamline your queries. +- Regularly audit your workloads and usage patterns to align with the system’s best practices. + +**Why This Matters** + +These rate limits are critical for maintaining a fair and robust environment where all users can achieve reliable performance without disruption from resource-heavy neighbors. This measure aligns with our commitment to delivering consistent and high-quality service across all accounts. + +For additional support or questions, please contact our support team or refer to our documentation on optimizing query performance. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_working_with_engines_using_ddl.md b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_working_with_engines_using_ddl.md new file mode 100644 index 0000000..ef2ba5c --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_operate_engines_working_with_engines_using_ddl.md @@ -0,0 +1,204 @@ +# [](#work-with-engines)Work with engines + +You can create, run, modify, and scale Firebolt engines using either the **Firebolt Workspace** [user interface](/Guides/query-data/using-the-develop-workspace.html) (UI) or the [Firebolt API](/API-reference/). Learn how to perform key engine operations, including starting, stopping, resizing, and configuring auto-start/stop settings, using both the UI and SQL commands. Firebolt also allows the dynamic scaling of engines without stopping them. + +All the engine operations in this guide can be performed using a [system engine](/Guides/operate-engines/system-engine.html). + +Topics: + +- [Create engines](#create-engines) – Learn how to create an engine. +- [Start or resume an engine](#start-or-resume-an-engine) – Learn how to start or resume an engine. +- [Stop an engine](#stop-an-engine) – Learn how to stop an engine either gracefully or immediately. +- [Resize engines](#resize-engines) – Learn how to scale engines up or down by adjusting the node type or number of nodes. +- [Concurrency auto-scaling](#concurrency-auto-scaling) – Learn how to enable auto-scaling for engines to automatically adjust the number of clusters based on workload. +- [Automatically start or stop an engine](#automatically-start-or-stop-an-engine) – Learn how to configure engines to start and stop automatically based on specific conditions. + +## [](#create-engines)Create engines + +You can create an engine using SQL scripts or through the UI in the **Develop Space**. + +### [](#create-an-engine-using-the-ui)Create an engine using the UI + +1. Login to the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup). +2. Select the **Develop Space** icon (</>) from the left navigation bar. +3. Select the red plus (+) button from the top of the left navigation bar. +4. Select **Create new engine**. + ![](/assets/images/Engine_Create_Popup.png) +5. Enter the engine name, type, and number of nodes. + ![](/assets/images/Create_Engine_Dialog.png) +6. Select **Create new engine**. + +### [](#create-an-engine-using-the-api)Create an engine using the API + +To create an engine, use [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html). + +The following code example creates an engine with one cluster containing two nodes of type `S`: + +``` +CREATE ENGINE myengine; +``` + +The following code example creates an engine with two nodes of type `M`: + +``` +CREATE ENGINE myengine WITH +TYPE="M" NODES=2 CLUSTERS=1; +``` + +When creating an engine using the UI, Firebolt preserves the exact capitalization of the engine name. For example, an engine named **MyEngine** will retain its casing. To reference this engine in SQL commands, enclose the name in quotes: “MyEngine”. For more information, visit the [Object Identifiers](/Reference/object-identifiers.html) page. + +## [](#start-or-resume-an-engine)Start or resume an engine + +### [](#start-an-engine-using-the-ui-)Start an engine using the UI + +1. In the **Engines** list, find the engine you want to start. +2. Open the dropdown menu next to the engine and select **Start engine**. + ![](/assets/images/Start_Engine.png) +3. The engine status changes to **Running** once started. + +### [](#start-an-engine-using-the-api-)Start an engine using the API + +To start your engine, use the [START ENGINE](/sql_reference/commands/engines/start-engine.html) command: + +``` +START ENGINE myengine; +``` + +## [](#stop-an-engine)Stop an engine + +### [](#stop-an-engine-using-the-ui-)Stop an engine using the UI + +1. In the **Engines** list, find the engine you want to stop. +2. Open the dropdown menu and select **Stop engine**. + ![](/assets/images/Stop_Engine.png) + +### [](#stop-an-engine-using-the-api-)Stop an engine using the API + +To stop an engine, use the [STOP ENGINE](/sql_reference/commands/engines/stop-engine.html) command: + +``` +STOP ENGINE myengine; +``` + +To stop an engine immediately without waiting for running queries to complete, use: + +``` +STOP ENGINE myengine WITH TERMINATE=TRUE; +``` + +Stopping an engine clears its cache. Queries run after restarting will experience a cold start, potentially impacting performance until the cache is rebuilt. + +## [](#resize-engines)Resize engines + +### [](#scale-engines-up-or-down-using-the-ui-)Scale engines up or down using the UI + +1. In the **Engines** list, find the engine to modify. +2. Open the dropdown menu and select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)). +3. Choose **Modify engine**. + ![](/assets/images/Alter_Engine_Popup.png) +4. Choose the new node type and select **Modify engine**. + ![](/assets/images/Modify_Engine_Type.png) + +### [](#scale-engines-up-or-down-using-the-api-)Scale engines up or down using the API + +Use the [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) command to change the node type: + +``` +ALTER ENGINE my_prod_engine SET TYPE = “M”; +``` + +The previous example updates all nodes in the engine to use the ‘M’ type. + +### [](#scale-engines-out-or-in-using-the-ui)Scale engines out or in using the UI + +1. In the **Engines** list, find the engine to modify. +2. Open the dropdown menu, select the **More options** icon (![More options icon](../../assets/images/more_options_icon.png)), and choose **Modify engine**. + ![](/assets/images/Alter_Engine_Popup.png) +3. Adjust the number of nodes using the (-) and (+) buttons. + +### [](#scale-engines-out-or-in-using-the-api)Scale engines out or in using the API + +Use the [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) command to change the number of nodes: + +``` +ALTER ENGINE my_prod_engine SET NODES = 3; +``` + +The previous example updates the engine so that it uses three nodes. + +## [](#concurrency-auto-scaling)Concurrency auto-scaling + +You can use the `MIN_CLUSTERS` and `MAX_CLUSTERS` parameters to enable auto-scaling and allow the engine to adjust the number of clusters based on workload. Firebolt scales the clusters between the defined minimum and maximum based on engine CPU usage, time in the queue, and other factors that vary with demand. Auto-scaling helps your engine adapt to fluctuating workloads, improving performance, minimizing delays during high demand, avoiding bottlenecks, ensuring consistent query response times, and optimizing resource utilization for a more cost-effective solution. + +To use auto-scale, do the following: + +1. Create an engine with `MIN_CLUSTERS` set to a value and `MAX_CLUSTERS` set to a value higher than `MIN_CLUSTERS` as shown in the following code example: + + ``` + CREATE ENGINE your_engine with MIN_CLUSTERS = 1 MAX_CLUSTERS = 2; + ``` + + In the previous code example, If `MIN_CLUSTERS` has the same value as `MAX_CLUSTERS`, auto-scaling is not enabled. +2. Check the `information_schema.engines` view to check how many clusters are being used by your engine. The following code example returns the number of `CLUSTERS`, `MIN_CLUSTERS`, and `MAX_CLUSTERS` from the specified engine: + + ``` + SELECT CLUSTERS, MIN_CLUSTERS, MAX_CLUSTERS + FROM information_schema.engines WHERE engine_name = 'your_engine' + ``` + + You can also select the **Engine monitoring** tab at the bottom of the **SQL script editor** in the **Develop Workspace** as shown in the following image: + + ![Icon showing the engine monitoring tab selected in the Firebolt Develop Workspace.](../../assets/images/icon-engine-monitoring.png) + + The **Engine monitoring** tab displays CPU, memory, and disk use, cache reads, number of running and suspended queries, and spilled bytes. +3. Test auto-scaling by running a query that overloads a single cluster, then check `information_schema.engines` to observe the change in the `CLUSTERS` value. You can use any query to test this functionality as long as it can overload the engine. The following example is one such query, but you can use any query that causes the engine to overload. + + 1. In the **Develop Space**, run the following example query **in two separate tabs simultaneously**. + The following code example calculates the maximum product of `a.x` and `b.y` after casting them to `BIGINT`, and the total count of joined rows from two generated series of numbers ranging from 1 to 1,000,000: + + ``` + SELECT MAX(a.x::bigint * b.y::bigint), COUNT(*) + FROM GENERATE_SERIES(1, 1000000) AS a(x) + JOIN GENERATE_SERIES(1, 1000000) AS b(y) ON TRUE; + ``` + 2. After about a minute, enter the code example in step 1 in a new tab. The query should return the numbers of `CLUSTERS` as `2` as shown in the following table: + + clusters min\_clusters max\_clusters 2 1 2 + 3. Stop the engine to stop resource consumption. These queries can run for a very long time and prevent the engine from stopping automatically. The following code example stops an engine without waiting for running queries to finish: + + ``` + STOP ENGINE your_engine WITH TERMINATE=true + ``` + +If you are using Firebolt in preview mode, you can only use a single cluster for your engines. If you want to try using multi-cluster engines, contact [Firebolt support](mailto:support@firebolt.io). Additionally, when scaling an engine, both the old and new compute resources may be active at the same time for a period. This simultaneous operation can result in higher consumption of Firebolt Units ([FBUs](/Overview/engine-consumption.html)). + +## [](#automatically-start-or-stop-an-engine)Automatically start or stop an engine + +You can configure an engine to start automatically after creation and to stop after a set idle time. + +### [](#configure-automatic-startstop-using-the-ui)Configure automatic start/stop using the UI + +1. In the **Create new engine** menu, open **Advanced Settings**. +2. Disable **Start engine immediately** to prevent the engine from starting upon creation. + ![](/assets/images/Engine_Initially_Stopped.png) +3. To configure automatic stopping, enable **Automatically stop engine** and set your idle timeout. The default is 20 minutes. Toggle the button off to disable auto-stop. + ![](/assets/images/Engine_Auto_Stop.png) + +### [](#configure-automatic-startstop-using-the-api)Configure automatic start/stop using the API + +Use the [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) command to set auto-start and auto-stop options: + +``` +CREATE ENGINE my_prod_engine WITH +INITIALLY_STOPPED = true AUTO_STOP = 10; +``` + +The previous example creates an engine that remains stopped after creation and auto-stops after 10 minutes of inactivity. + +To modify the auto-stop feature later, use the [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) command: + +``` +ALTER ENGINE my_prod_engine SET AUTO_STOP = 30; +``` + +The `INITIALLY_STOPPED` function can only be set during engine creation and cannot be modified afterward. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_query_data.md b/cmd/docs-scrapper/fireboltdocs/guides_query_data.md new file mode 100644 index 0000000..c6f0c8f --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_query_data.md @@ -0,0 +1,10 @@ +# [](#query-data)Query data + +Querying data in Firebolt is designed to be fast, flexible, and efficient, allowing you to extract insights from large datasets. Firebolt supports interactive querying through the **Develop Workspace** for hands-on exploration and offers an API for programmatic access, making it easy to integrate queries into automated workflows. + +Firebolt provides the following approaches for querying data: + +- The [Develop Workspace](/Guides/query-data/using-the-develop-workspace.html) – An intuitive, web-based user interface for writing, running, and refining SQL queries. It simplifies the query development process with features like syntax highlighting, instant query results, and result visualization. Designed for interactive exploration and analysis, the **Develop Workspace** can help you work efficiently with large datasets, troubleshoot queries, and fine-tune performance in a single integrated environment. +- [Drivers](/Guides/developing-with-firebolt/) – Libraries and SDKs that enable you to connect to Firebolt databases from your applications, scripts, and tools. Firebolt provides drivers for popular programming languages like Python, .NET, and Java, allowing you to interact with Firebolt databases programmatically and integrate them into your applications. +- [Connectors](/Guides/integrations/integrations.html) – Pre-built integrations that enable you to connect Firebolt to third-party tools, data sources, and services. Firebolt offers connectors for popular data tools like Tableau, Looker, and dbt, allowing you to seamlessly query Firebolt databases from your preferred analytics platforms. +- The [Firebolt API](/Guides/query-data/using-the-api.html) – A REST API that provides access to Firebolt databases. Allows you to create a custom integration with Firebolt, in case none of the available drivers or connectors meet your requirements. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_api.md b/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_api.md new file mode 100644 index 0000000..d46ae34 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_api.md @@ -0,0 +1,121 @@ +# [](#firebolt-api)Firebolt API + +Use the Firebolt REST API to execute queries on engines programmatically. Learn how to use the API, including authentication, working with engines and executing queries. A service account is required to access the API. Learn about [managing programmatic access to Firebolt](/Guides/managing-your-organization/service-accounts.html). + +- [Firebolt API](#firebolt-api) + + - [Create a service account and associate it with a user](#create-a-service-account-and-associate-it-with-a-user) + - [Use tokens for authentication](#use-tokens-for-authentication) + - [Get the system engine URL](#get-the-system-engine-url) + - [Execute a query on the system engine](#execute-a-query-on-the-system-engine) + - [Get a user engine URL](#get-a-user-engine-url) + - [Execute a query on a user engine](#execute-a-query-on-a-user-engine) + +## [](#create-a-service-account-and-associate-it-with-a-user)Create a service account and associate it with a user + +Create a service account with organization administrator privilege, i.e., the service account property\_is\_organization\_admin_ must be *true*. Next, create a user with role privileges you would like to have the service account and associate the service account with the user. + +## [](#use-tokens-for-authentication)Use tokens for authentication + +To authenticate Firebolt using the service accounts with the properties as described above via Firebolt’s REST API, send the following request to receive an authentication token: + +``` +curl -X POST --location 'https://id.app.firebolt.io/oauth/token' \ +--header 'Content-Type: application/x-www-form-urlencoded' \ +--data-urlencode 'grant_type=client_credentials' \ +--data-urlencode 'audience=https://api.firebolt.io' \ +--data-urlencode "client_id=${service_account_id}" \ +--data-urlencode "client_secret=${service_account_secret}" +``` + +where: + +Property Data type Description client\_id TEXT The service [account ID](/Guides/managing-your-organization/service-accounts.html#get-a-service-account-id). client\_secret TEXT The service [account secret](/Guides/managing-your-organization/service-accounts.html#generate-a-secret). + +**Response** + +``` +{ + "access_token":"access_token_value", + "token_type":"Bearer", + "expires_in":86400 +} +``` + +In the previous example response, the following apply: + +- The `access_token` is a unique token that authorizes your API requests that acts as a temporary key to access resources or perform actions. You can use this token to authenticate with Firebolt’s platform until it expires. +- The `token_type` is `Bearer`, which means that the access token must be included in an authorization header of your API requests using the format: `Authorization: Bearer `. +- The token `expires_in` indicates the number of seconds until the token expires. + +Use the returned access\_token to authenticate with Firebolt. + +To run a query using the API, you must first obtain the url of the engine you want to run on. Queries can be run against any engine in the account, including the system engine. + +## [](#get-the-system-engine-url)Get the system engine URL + +Use the following endpoint to return the system engine URL for ``. + +``` +curl https://api.app.firebolt.io/web/v3/account//engineUrl \ +-H 'Accept: application/json' \ +-H 'Authorization: Bearer ' +``` + +**Example:** `https://api.app.firebolt.io/web/v3/account/my-account/engineUrl` + +**Response** + +``` +{ + "engineUrl":".api.us-east-1.app.firebolt.io" +} +``` + +## [](#execute-a-query-on-the-system-engine)Execute a query on the system engine + +Use the following endpoint to run a query on the system engine: + +``` +curl --location 'https://' \ +--header 'Authorization: Bearer ' \ +--data '' +``` + +where: + +Property Data type Description system engine URL TEXT The system engine URL ([retrieved here](#get-the-system-engine-url)) SQL query TEXT Any valid SQL query (optional) database name TEXT The database name + +## [](#get-a-user-engine-url)Get a user engine URL + +Get a user engine url by running the following query against the `information_schema.engines` table: + +``` +SELECT url +FROM information_schema.engines +WHERE engine_name='' +``` + +You can run the query on the system engine using the API with the following request: + +``` +curl --location 'https:///query' \ +--header 'Authorization: Bearer ' \ +--data 'SELECT * FROM information_schema.engines WHERE engine_name='\''my_engine'\''' +``` + +## [](#execute-a-query-on-a-user-engine)Execute a query on a user engine + +Use the following endpoint to run a query on a user engine: + +``` +curl --location 'https://&database=' \ +--header 'Authorization: Bearer ' \ +--data '' +``` + +where: + +Property Data type Description user engine URL TEXT The user engine URL ([retrieved here](#get-a-user-engine-url)) database name TEXT The database to run the query SQL query TEXT Any valid SQL query + +Queries are per request. To run multiple statement queries, separate queries each into one request. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_develop_workspace.md b/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_develop_workspace.md new file mode 100644 index 0000000..a61949a --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_query_data_using_the_develop_workspace.md @@ -0,0 +1,157 @@ +# [](#use-the-develop-space)Use the Develop Space + +- [Open the Develop Space](#open-the-develop-space) +- [A quick tour](#a-quick-tour) +- [Using the document editor](#using-the-document-editor) + + - [Using auto-complete](#using-auto-complete) + - [Using script templates](#using-script-templates) + - [Using the CREATE EXTERNAL TABLE template to import data](#using-the-create-external-table-template-to-import-data) +- [Managing scripts](#managing-scripts) +- [Running scripts and working with results](#running-scripts-and-working-with-results) + + - [Viewing results](#viewing-results) + - [Viewing multi-statement script results](#viewing-multi-statement-script-results) + - [Exporting results to a local hard drive](#exporting-results-to-a-local-hard-drive) +- [Switching between light and dark mode](#switching-between-light-and-dark-mode) +- [Keyboard shortcuts for the Develop Space](#keyboard-shortcuts-for-the-develop-space) + + - [Query operations](#query-operations) + - [Script management](#script-management) + - [Search functionality](#search-functionality) + - [Editing text](#editing-text) + +The **Firebolt Workspace** has a **Develop Space** that you use to edit and run SQL scripts and view query results. + +## [](#open-the-develop-space)Open the Develop Space + +You can launch the space for a database by clicking the **Develop** icon from the left navigation pane or clicking the “+” icon next to “Script 1”. + +![drawing](../../assets/images/develop_workspace_ex0.png) + +![drawing](../../assets/images/develop_workspace_ex4.png) + +**Starting the Develop Space for the last database you worked with** + +1. Choose the **</>** icon from the left navigation pane. + + ![drawing](../../assets/images/develop_workspace_ex0.png) + + The space for the database that you last worked with will open, and the database will be selected from the list. +2. To switch to different database’s space, choose from the dropdown menu in the Databases panel. + +## [](#a-quick-tour)A quick tour + +The **Develop Space** is organized into two panels. + +- The left panel is the explore panel. You can use it to navigate to different databases and to work with different scripts in your database. +- The center panel is the document editor. You can use it to edit scripts, save them, and run scripts. When you run a script, the results will be shown in the bottom part of the pane. + + ![drawing](../../assets/images/develop_workspace_ex5.png) + +## [](#using-the-document-editor)Using the document editor + +The document editor uses tabs to help you organize your SQL scripts. You can switch tabs to work with different scripts and run them. You can have multiple query statements on the same tab. Each statement must be terminated by a semi-colon (`;`). + +### [](#using-auto-complete)Using auto-complete + +As you enter your code in a script tab, Firebolt suggests keywords and object names from the chosen database. Press the tab key to add the first suggestion in the list to your script, or use arrow keys to select a different item from the list and then press the tab key. + +### [](#using-script-templates)Using script templates + +Script templates are available for common tasks, such as creating fact or dimension tables. Place the cursor in the editor where you want to insert code, choose the **</+** icon, and then select a query template from the list. + +### [](#using-the-create-external-table-template-to-import-data)Using the CREATE EXTERNAL TABLE template to import data + +To create an external table, which is the first step for ingesting data into Firebolt, choose the **Import Data** button from the object pane or choose the download icon and then choose **Import data** as shown in the example below. + +Firebolt creates a new tab with a `CREATE EXTERNAL TABLE` statement. + +## [](#managing-scripts)Managing scripts + +- [To rename a script](#scriptrename) +- [To copy a script](#scriptcopy) +- [To export a script and download it as a .sql file](#scriptexport) + +**Renaming a script**[]() + +- Choose the vertical ellipses next to the script name in the left pane, choose **Rename script**, type a new name, and then press ENTER. + +**Copying a script**[]() + +- Choose the vertical ellipses next to the script name in the left pane, choose **Duplicate script**, and then press ENTER. Firebolt saves a new script with the pattern \`\_copy. + +**Exporting a script and downloading it as a .sql file**[]() + +- Choose the vertical ellipses next to the script name in the left pane, and then choose **Export script**. + + Firebolt downloads the file to your browser’s default download directory using the file pattern `.sql`. + +## [](#running-scripts-and-working-with-results)Running scripts and working with results + +At the bottom of each script tab, you can choose **Run** to execute SQL statements. SQL statements can only run on running engines. If an engine isn’t running, you can select it from the list and then choose the **Start** button for that engine. For more information about engines, see [Operate engines](/Guides/operate-engines/operate-engines.html) + +You can run all statements in a script or select snippets of SQL to run. + +**Running all SQL statements in a script** + +- Position the cursor anywhere in the script editor and then choose **Run**. All SQL statements must be terminated by a semi-colon (`;`) or an error occurs. + +**Running a snippet of SQL as a statement** + +- Select the SQL code you want to run as a statement and then choose **Run**. Behind the scenes, Firebolt automatically appends a semi-colon to the selected SQL code so it can run as a statement. + +### [](#viewing-results)Viewing results + +After you run a script or query statement, more results appear below the script editor, along with statistics about query execution. The statistics section will provide further information on your statement such as its status, duration, and more. + +![drawing](../../assets/images/develop_workspace_ex1.png) + +### [](#viewing-multi-statement-script-results)Viewing multi-statement script results + +When you run a script that has multiple SQL statements with result sets (`SELECT` statements), each result is shown on a separate line with statistics about statement execution. The first statement that ran is numbered 1 and at the bottom of the list. + +To view the results table for a result set, choose the table icon as shown in the example below. + +![drawing](../../assets/images/develop_workspace_ex6.png) + +### [](#exporting-results-to-a-local-hard-drive)Exporting results to a local hard drive + +You can export up to 10,000 rows of query results to your local hard drive after you run a query. + +1. Choose the download icon (see image below). +2. Choose **Export table as CSV** or **Export table as JSON**. + Firebolt downloads the file type that you chose to the default download location for your browser. + +It is possible to export the results of a single query alongside the results summary of all queries run in your script (with the statistics). + +## [](#switching-between-light-and-dark-mode)Switching between light and dark mode + +Click on the toggle at the bottom of the left navigation pane to switch between light and dark mode. + +![drawing](../../assets/images/develop_workspace_ex7.png) + +## [](#keyboard-shortcuts-for-the-develop-space)Keyboard shortcuts for the Develop Space + +- [Query operations](#query-operations) +- [Script management](#script-management) +- [Search functionality](#search-functionality) +- [Editing text](#editing-text) + +**Tip:** Use the **Keyboard shortcuts panel** (`Ctrl + Shift + ?`) to quickly view available shortcuts directly within the Develop Space. + +### [](#query-operations)Query operations + +Function Windows & Linux Shortcut Mac Shortcut **Run** the **currently selected query**. Ctrl + Enter ⌘ + Enter **Run all** queries in the current script. Ctrl + Shift + Enter ⌘ + Shift + Enter **Toggle** expanding or collapsing **query results**. Ctrl + Alt + E ⌘ + Option + E + +### [](#script-management)Script management + +Function Windows & Linux Shortcut Mac Shortcut **Create** a new script. Ctrl + Alt + N ⌘ + Option + N **Jump** to a **previous** script. Ctrl + Alt + \[ ⌘ + Option + \[ **Jump** to the **next** script. Ctrl + Alt + ] ⌘ + Option + ] **Close** the **current** script. Ctrl + Alt + X ⌘ + Option + X **Close all** scripts. Ctrl + Alt + G ⌘ + Option + G **Close all but** the **current** script. Ctrl + Alt + O ⌘ + Option + O + +### [](#search-functionality)Search functionality + +Function Windows & Linux Shortcut Mac Shortcut **Open** a **search** panel. Ctrl + F ⌘ + F **Find** the **next search result**. F3 F3 **Find** the **previous search result**. Shift + F3 Shift + F3 + +### [](#editing-text)Editing text + +Function Windows & Linux Shortcut Mac Shortcut **Toggle** adding or removing a **comment marker** for the current line. Ctrl + / Cmd + / **Toggle** adding or removing a **block comment marker** around a block of code or text. Shift + Alt + A Shift + Option + A **Automatically organize and indent** code for readability. Ctrl + Alt + F ⌘ + Option + F **Copy** the selected lines and paste them directly **above** the original. Alt + Shift + Up arrow Shift + Option + Up arrow **Move** the selected lines and paste them directly **above** the original without creating a duplicate. Alt + Up arrow Option + Up arrow **Copy** the selected lines and paste them directly **below** the original. Alt + Shift + Down arrow Shift + Option + Down arrow **Move** the selected lines and paste them directly **below** the original without creating a duplicate. Alt + Down arrow Option + Down arrow **Select text** to the **left** of the cursor. Alt + Shift + Left arrow Ctrl + Shift + Left arrow **Select text** to the **right** of the cursor. Alt + Shift + Right arrow Ctrl + Shift + Right arrow **Select** the **entire line**. Alt + L Ctrl + L **Decrease** the **indentation level** of the current or selected lines. Ctrl + \[ Cmd + \[ **Increase** the **indentation level** of the current or selected lines. Ctrl + ] Cmd + ] **Delete** the current or selected **lines**. Shift + Ctrl + K Shift + Cmd + K \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security.md b/cmd/docs-scrapper/fireboltdocs/guides_security.md new file mode 100644 index 0000000..79183de --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security.md @@ -0,0 +1,12 @@ +# [](#configure-security)Configure security + +Learn how to reduce potential attack surface with Firebolt’s security features, including multi-factor authentication and single-sign-on (SSO) integration, role-based access control (RBAC) and network policies to secure data assets in data warehouses. + +* * * + +- [Configure SSO](/Guides/security/sso/) +- [Role-based access control (RBAC)](/Guides/security/rbac.html) +- [Network policies](/Guides/security/network-policies.html) +- [Multi-factor authentication](/Guides/security/enabling-mfa.html) +- [Ownership](/Guides/security/ownership.html) +- [AWS PrivateLink](/Guides/security/privatelink.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_enabling_mfa.md b/cmd/docs-scrapper/fireboltdocs/guides_security_enabling_mfa.md new file mode 100644 index 0000000..4ce62a0 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_enabling_mfa.md @@ -0,0 +1,36 @@ +# [](#enable-multi-factor-authentication-mfa)Enable multi-factor authentication (MFA) + +Enable multi-factor authentication (MFA) as an additional layer of security to protect data that is accessible through Firebolt. With MFA enabled, users must authenticate with a one-time code generated by their mobile device upon login. MFA can be enabled per login. + +Enabling MFA for a login requires the org\_admin role. + +## [](#enable-mfa-for-a-login)Enable MFA for a login + +### [](#sql)SQL + +To enable MFA for a login using SQL, use the [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) statement. For example: + +``` +ALTER LOGIN "alex@acme.com" SET IS_MFA_ENABLED = TRUE; +``` + +Multi-factor authentication can also be set for new logins, with the [CREATE LOGIN](/sql_reference/commands/access-control/create-login.html) command. For example: + +``` +CREATE LOGIN "betsy@acme.com" SET IS_MFA_ENABLED = TRUE; +``` + +### [](#ui)UI + +To enable MFA for a login in the UI: + +![Configure > MFA](/assets/images/mfamanagement.png) + +1. Click **Configure** to open the configure space, then choose **Logins** from the menu. +2. Search for the relevant login using the top search filters or by scrolling through the logins list. Toggle on **Is MFA enabled**. + +![Is MFA enabled](/assets/images/mfaenabled.png) + +MFA can also be enabled when creating a login, by toggling **Is MFA enabled** in the **Create login** window: + +![Enable MFA](../../assets/images/mfaloginenabled.png) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_network_policies.md b/cmd/docs-scrapper/fireboltdocs/guides_security_network_policies.md new file mode 100644 index 0000000..16b0fae --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_network_policies.md @@ -0,0 +1,101 @@ +# [](#manage-network-policies)Manage network policies + +By default, Firebolt accepts traffic from any IP address. As an additional layer of security, you can configure individual Firebolt logins or service accounts so their traffic must originate only from the IP addresses that you specify. For each configuration (network policy), you specify a list of IP addresses from which traffic is allowed (the allow list) and a list of IP addresses from which traffic is denied (the blocked list). A network policy is a collection of allowed and blocked lists of IP addresses. + +Network policies can be configured on the organization level and also per login or service account. When evaluating a network policy, Firebolt validates the login/service account IP addresses first by the policy set at organization level. If there is no network policy on the organization level (or the organization-level network policy does not allow access), then the network policy is validated at the login/service account level. If a network policy does not allow access, the user will receive a `401 Unauthorized` response. + +The IP allow and blocked lists used to specify a network policy are specified as comma-separated IPv4 addresses and/or IPv4 address ranges in CIDR format. You can apply the same list to one or many users, and each user can have unique lists. You can specify lists manually or import lists of addresses and ranges from a CSV file saved locally. You can add, edit or delete network policies using SQL or in the UI. + +To view all network policies, click **Configure** to open the configure space, then choose **Network policies** from the menu, or query the [information\_schema.network\_policies](/sql_reference/information-schema/network_policies.html) view. + +Managing network policies requires the org\_admin role. + +## [](#create-a-network-policy)Create a network policy + +### [](#sql)SQL + +To create a network policy using SQL, use the [CREATE NETWORK POLICY](/sql_reference/commands/access-control/create-network-policy.html) statement. For example: + +``` +CREATE NETWORK POLICY my_network_policy WITH ALLOWED_IP_LIST = (‘4.5.6.1’, ‘2.4.5.1’) DESCRIPTION = 'my new network policy' +``` + +### [](#ui)UI + +To create a network policy via the UI: + +![Configure > Network policies](/assets/images/networkpoliciespage.png) + +1. Click **Configure** to open the configure space, then choose **Network policies** from the menu. +2. From the Network policies management page, choose **Create a new network policy**. +3. Enter a network policy name. Optionally, enter a network policy description. To add to the allow list, enter comma-separated IPv4 addresses, or IPv4 address ranges in CIDR format under **Grant access from selected allowed IP addresses**, or choose **import file** to read IP addresses from a CSV file. +4. Enter addreses for the block list in the **Deny access from selected blocked IP addresses**. +5. Choose **Save**. + +For each user, the Allowed IPs and Blocked IPs are updated to reflect the total number of IP addresses from each list that you specified for that user. Network policies created in UI are automatically attached to the organization to which the policy creator is logged in. + +## [](#attach-a-network-policy-to-an-organization)Attach a network policy to an organization + +### [](#sql-1)SQL + +When a network policy is created in UI, it is automatically attached to an organization the creator is logged in to. However, to attach (or detach) a network policy, you can use the command [ALTER ORGANIZATION](/sql_reference/commands/data-definition/alter-organization.html). For example: + +``` +ALTER ORGANIZATION my_organization SET NETWORK_POLICY = my_network_policy +``` + +or to detach: + +``` +ALTER ORGANIZATION my_organization SET NETWORK_POLICY = DEFAULT +``` + +### [](#ui-1)UI + +To attach/detach a network policy to an organization via the UI: + +![Configure > Network policies](/assets/images/networkpoliciespagetoggle.png) + +1. Click **Configure** to open the configure space, then choose **Network policies** from the menu. +2. Search for the relevant network policy using the top search filters or by scrolling through the list. +3. Switch the **Is organizational** toggle to on or off. + +## [](#edit-a-network-policy)Edit a network policy + +### [](#sql-2)SQL + +To edit a network policy using SQL, use the [ALTER NETWORK POLICY](/sql_reference/commands/access-control/alter-network-policy.html) statement. For example: + +``` +ALTER NETWORK POLICY my_network_policy SET ALLOWED_IP_LIST = (‘4.5.6.7’, ‘2.4.5.7’) BLOCKED_IP_LIST = (‘6.7.8.9’) DESCRIPTION = 'updated network policy' +``` + +### [](#ui-2)UI + +To edit a network policy via the UI: + +1. Click **Configure** to open the configure space, then choose **Network policies** from the menu. +2. Search for the relevant network policy using the top search filters or by scrolling through the list. Hover over the right-most column to make the network policy menu appear, then choose **Edit network policy**. +3. From here you can edit description, allowed and blocked IP addresses and choose **Save**. + +![Edit network policy](../../assets/images/editnetworkpolicy.png) + +## [](#delete-a-network-policy)Delete a network policy + +### [](#sql-3)SQL + +To delete a network policy using SQL, use the [DROP NETWORK POLICY](/sql_reference/commands/access-control/drop-network-policy.html) statement. For example: + +``` +DROP NETWORK POLICY my_network_policy [ RESTRICT | CASCADE ] +``` + +### [](#ui-3)UI + +To delete a network policy via the UI: + +1. Click **Configure** to open the configure space, then choose **Network policies** from the menu. +2. Search for the relevant network policy using the top search filters or by scrolling through the list. Hover over the right-most column to make the network policy menu appear, then choose **Delete network policy**. You will need to confirm that you will also be removing links to the network policy by choosing **Remove the linkage to logins, service accounts, or to the entire organization** +3. Choose **Confirm**. + +![Delete network policy](../../assets/images/deletenetworkpolicy.png) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_ownership.md b/cmd/docs-scrapper/fireboltdocs/guides_security_ownership.md new file mode 100644 index 0000000..22bb4c7 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_ownership.md @@ -0,0 +1,77 @@ +# [](#ownership)Ownership + +Ownership allows users to perform all operations on any object they created without having to grant privileges for these operations manually. This provides a smoother user experience because objects are immediately available to use as they are created. These operations include granting privileges on owned objects. + +## [](#supported-object-types)Supported object types + +The object types that support ownership are: + +- Role +- User +- Engine +- Database +- Schema +- Table +- View + +The current owner of an object can be viewed in the corresponding information\_schema view: + +Object View Role N/A User [information\_schema.users](/sql_reference/information-schema/users.html) Database [information\_schema.catalogs](/sql_reference/information-schema/catalogs.html) Engine [information\_schema.engines](/sql_reference/information-schema/engines.html) Schema [information\_schema.schemata](/sql_reference/information-schema/schemata.html) Table [information\_schema.tables](/sql_reference/information-schema/tables.html) View [information\_schema.views](/sql_reference/information-schema/views.html) or [information\_schema.tables](/sql_reference/information-schema/tables.html) + +Index ownership, shown in [information\_schema.indexes](/sql_reference/information-schema/indexes.html), will always show the table owner as an index’s owner. + +## [](#changing-an-objects-owner)Changing an object’s owner + +The owner of an object may alter its ownership using the following syntax: + +``` +ALTER OWNER TO +``` + +Examples: + +``` +ALTER DATABASE db OWNER TO new_owner +ALTER ENGINE eng OWNER TO new_owner +ALTER ROLE r OWNER TO new_owner +ALTER USER u OWNER TO new_owner +ALTER SCHEMA public OWNER TO new_owner +ALTER TABLE t OWNER TO new_owner +ALTER VIEW v OWNER TO new_owner +``` + +## [](#dropping-users-that-own-objects)Dropping users that own objects + +Any objects owned by a user must first be dropped or have their owner changed before dropping the user. + +A table owner can drop the table even if there are views referencing it that are not owned by the table’s owner, using the `CASCADE` parameter to [DROP TABLE](/sql_reference/commands/data-definition/drop-table.html). + +## [](#transfer-ownership-using-the-firebolt-workspace)Transfer ownership using the Firebolt Workspace + +You can use the user interface in the **Firebolt Workspace** to transfer ownership of objects as follows: + +1. Log in to the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup). If you don’t yet have an account with Firebolt, you can sign up for one. +2. Select the Govern icon (![The icon to open the Govern Space.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +3. Select **Ownership** from the left navigation pane. +4. Select the three horizontal dots (…) to the right of the object that you want to transfer ownership of. +5. Select **Transfer ownership** from the drop-down list. +6. In the **Transfer ownership** window that opens, choose a new owner from the drop-down list. +7. Select the **Transfer ownership** button to confirm. + +##### [](#viewing-all-objects-owned-by-a-user)Viewing all objects owned by a user + +1. From the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup), select the Govern icon (![The icon to open the Govern Space.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left navigation pane. +3. Select the user from the **User Name** column. +4. Select the **Ownership** tab to view a list of objects owned by the selected user. + +##### [](#bulk-transferring-or-deleting-objects-owned-by-a-user)Bulk transferring or deleting objects owned by a user + +1. From the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup), select the Govern icon (![The icon to open the Govern Space.](../../assets/images/govern-icon.png)) in the left navigation pane to open the **Govern Space**. +2. Select **Users** from the left navigation pane. +3. Select the three horizontal dots (…) to the right of the user whose objects you want to transfer ownership of. +4. Select **Transfer ownership** from the drop-down list. +5. In the window that opens, select the checkboxes next to objects that you want to delete or transfer ownership of. +6. Select the **Delete object** or **Transfer ownership** button to apply changes. + +Ownership transfer using the **Firebolt Workspace** is not available for `Schema`, `Table`, and `View` objects. These must be modified using SQL commands in the **Develop Workspace** or using the [Firebolt API](/API-reference/). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_privatelink.md b/cmd/docs-scrapper/fireboltdocs/guides_security_privatelink.md new file mode 100644 index 0000000..f056b6c --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_privatelink.md @@ -0,0 +1,117 @@ +# [](#aws-privatelink)AWS PrivateLink + +Firebolt supports AWS PrivateLink to help you securely access the Firebolt API without exposing traffic to the public internet. AWS PrivateLink enhances security, minimizes data exposure, and improves network reliability by keeping traffic within AWS. This guide shows you how to: + +- [Request AWS PrivateLink access](#request-aws-privatelink-access) +- [Configure your VPC endpoint](#configure-your-vpc-endpoint) +- [Configure your service account](#configure-your-service-account) +- [Test your AWS PrivateLink connection](#test-your-aws-privatelink-connection) + +AWS PrivateLink for Firebolt is in public preview and available in all Firebolt regions. + +## [](#prerequisites)Prerequisites + +Before setting up AWS PrivateLink, ensure you have the following: + +- An **AWS account** with permissions to create a [VPC interface endpoint](https://docs.aws.amazon.com/vpc/latest/privatelink/create-interface-endpoint.html). If you don’t have access, you can [sign up](https://signin.aws.amazon.com/signup) for an AWS account. +- A **Firebolt Account**: Ensure that you have access to an active Firebolt account. If you don’t have access, you can [sign up for an account](https://www.firebolt.io/sign-up). For more information about how to register with Firebolt, see [Get started with Firebolt](/Guides/getting-started/). +- **Service Account**: You must have access to an active Firebolt [service account](/Guides/managing-your-organization/service-accounts.html), which facilitates programmatic access to Firebolt. Your service account must be associated with a [user](/Overview/organizations-accounts.html#users) with privileges associated with an [account administrative role](/Overview/organizations-accounts.html#account-administrative-role) or an [organizational administrative role](/Overview/organizations-accounts.html#organizational-administrative-role) to request AWS PrivateLink access. + +## [](#request-aws-privatelink-access)Request AWS PrivateLink access + +After meeting the previous prerequisites, request AWS PrivateLink access using Firebolt’s user interface as follows: + +1. [Log in](https://go.firebolt.io/signup) to the **Firebolt Workspace**. +2. Select the **Help** icon (?) at the bottom of the left navigation pane. +3. Select **Request PrivateLink**. +4. In the pop-up window, enter the following information: + + 1. **Organization Name** – The name of your [organization](/Overview/organizations-accounts.html#organizations) in Firebolt. + 2. **Account** – The Firebolt [account](/Overview/organizations-accounts.html#accounts) associated with a [role](/Overview/organizations-accounts.html#roles) with sufficient permission to request AWS PrivateLink. These include the [account administrative](/Overview/organizations-accounts.html#account-administrative-role) or [organizational administrative roles](/Overview/organizations-accounts.html#organizational-administrative-role). + 3. **AWS Account IDs** – The AWS account IDs for which you want to create a PrivateLink integration. For additional information about permissions, see [Manage permissions](https://docs.aws.amazon.com/vpc/latest/privatelink/configure-endpoint-service.html#add-remove-permissions). +5. After you submit the AWS PrivateLink request, Firebolt’s support team will review it, provision a dedicated VPC endpoint in your Account’s AWS Region, and send an email to the requestor containing the `Endpoint URL` and `endpointServiceId`. Save this information for configuration. + +### [](#configure-your-vpc-endpoint)Configure your VPC endpoint + +After you have requested AWS PrivateLink on Firebolt’s user interface, login to AWS and configure a VPC endpoint as follows: + +01. Sign in to the [AWS Management Console](https://aws.amazon.com/console/). +02. In the search bar at the top, enter **VPC**. +03. Select **VPC (Virtual Private Cloud)** from the dropdown list. +04. In the left navigation pane under **VPC Dashboard**, expand **PrivateLink and Lattice**. +05. Select **Endpoints**. +06. In the upper right corner, select **Create endpoint**. +07. In the **Create endpoint** pane, enter an optional **Name tag** to identify your endpoint. +08. Select the radio button next to **Endpoint services that use NLBs and GWLBs**. +09. In the **Service settings** pane, in the text box under **Service name**, enter the `endpointServiceId` provided in the email from Firebolt’s support team in the previous step to [request AWS PrivateLink access](#request-aws-privatelink-access). +10. Select **Verify service** to confirm that your AWS PrivateLink access is configured correctly. and select Other endpoint services. +11. In the **Network settings** pane, select the down arrow to select your autopopulated **VPC** from the dropdown list. +12. Select the checkbox **Enable DNS NAME**. +13. In the **Subnets** pane select the checkbox next to the subnets that match the **Availability Zone** where your resources reside in your AWS Region. +14. Select the down arrow under **Subnet ID** and hoose the appropriate Subnet ID for your VPC. +15. In the **Security groups** pane, select the checkbox next to the **Group ID** of your security group. Your security group should allow inbound traffic on **port 443** in order to interact with the Firebolt API. +16. Select the **Create endpoint** button in the bottom-right corner of the main workspace. +17. After the endpoint is created, ensure that your security groups and route tables are correctly configured to allow traffic to the endpoint, so that your intended workloads can access Firebolt over the Private API endpoint. Use the following code example to validate your connection to Firebolt by sending it from an EC2 instance in your VPC: + +``` +curl -v https://api.app.firebolt.io --resolve api.app.firebolt.io:443: +``` + +In the previous code example, replace with the private IP address of your newly created VPC endpoint network interface. + +### [](#configure-your-service-account)Configure your service account + +Configure your Firebolt [service account](/Guides/managing-your-organization/service-accounts.html) with the `PRIVATE_ONLY` connection preference to ensure it accesses Firebolt only through AWS PrivateLink and uses private APIs. A claim is a setting that defines how a connection behaves. The `PRIVATE_ONLY` claim enforces private networking by restricting access to public endpoints. + +Use the following code example inside the **Develop Workspace** in the **Firebolt Workspace**: + +``` +CREATE SERVICE ACCOUNT IF NOT EXISTS "test_sa" +WITH CONNECTION_PREFERENCE = PRIVATE_ONLY; +``` + +### [](#test-your-aws-privatelink-connection)Test your AWS PrivateLink connection + +After configuring your VPC to use the Firebolt AWS PrivateLink endpoint, test connectivity using the endpoint URL provided by the Firebolt support team. + +Use the following curl command to retrieve the private endpoint from your account: + +``` +curl https://api.go.firebolt.io/web/v3/account/developer/engineUrl \ +-H 'Accept: application/json' \ +-H "Authorization: Bearer $TOKEN" +{ + "engineUrl": "01hnj9r1xrx3a4t3kb1ec7qs2b.api-private.us-east-1.app.firebolt.io" +} +``` + +If your service account has the `PRIVATE_ONLY` claim, requests from that service account to any Firebolt public endpoint will fail. + +When using the `PRIVATE_ONLY` claim, requests to the private endpoint complete successfully if the traffic originates from an authorized AWS VPC endpoint with the necessary route tables, security group rules, and network access control lists to enable communication. + +The following code example sends a `SELECT 42` query to a Firebolt private API endpoint using `curl`, authenticates with a bearer token, and returns a JSON response containing the query result: + +``` +curl --location 'https://01hnj3r1xrx3a4t3kb1ec7qs2b.api-private.us-east-1.app.firebolt.io' \ +--header "Authorization: Bearer $TOKEN" \ +--data 'SELECT 42' + +{ + "meta": [ + { + "name": "?column?", + "type": "int" + } + ], + "data": [ + { + "?column?": 42 + } + ], + "rows": 1, + "statistics": { + "elapsed": 0.014256, + "rows_read": 1, + "bytes_read": 1 +} +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_rbac.md b/cmd/docs-scrapper/fireboltdocs/guides_security_rbac.md new file mode 100644 index 0000000..17fd2ea --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_rbac.md @@ -0,0 +1,194 @@ +# [](#manage-role-based-access-control)Manage role-based access control + +Role-Based Access Control (RBAC) allows you to manage user permissions by controlling who can access or perform operations on specific objects in Firebolt. This guide provides a step-by-step process for setting RBAC in Firebolt. + +## [](#prerequisites)Prerequisites + +The following material can help you understand key concepts related to organizations and RBAC in Firebolt: + +- [Organizations and accounts](/Overview/organizations-accounts.html) – How Firebolt provides a structure for managing users, resources, and permissions. +- [Role-Based Access Control](/Overview/Security/Role-Based%20Access%20Control/) – How administrators manage user permissions and control access to resources based on predefined roles. + +## [](#view-all-roles)View all roles + +To view all roles using the **Firebolt Workspace**, do the following: + +1. Login to the [Firebolt Workspace](https://firebolt.go.firebolt.io/signup). +2. Select the **Govern** icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png))from the left navigation bar to open the **Govern Space**. +3. Choose **Roles** from the left panel under **Govern**. + +To view all roles using SQL, query the [information\_schema.applicable\_roles](/sql_reference/information-schema/applicable-roles.html) view as shown in the following code example: + +``` +SELECT + * +FROM + information_schema.applicable_roles; +``` + +## [](#create-a-role)Create a role + +You can create a role using the **Firebolt Workspace** user interface (UI) or using SQL. + +### [](#create-a-role-using-sql)Create a role using SQL + +The following code example uses [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) to create the role `user_role`: + +``` +CREATE ROLE user_role; +``` + +### [](#create-a-role-using-the-ui)Create a role using the UI + +To create a custom role using the UI: + +1. Select the **Govern** icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) from the left navigation bar to open the **Govern Space**. +2. Choose **Roles** from the left panel under **Govern**. +3. Choose the **+ New Role** button in the upper-right corner of the page. +4. Under **Create role**, enter a role name. +5. Under **Role privileges:**, select the object type that you want to grant permissions for. You can choose either **Databases** or **Engines**. +6. Configure permissions for the role: + + - Toggle the buttons under **Databases privileges** or **Engine privileges** to grant permissions to **create** or **modify** an objects across all databases or engines. If you want to apply permissions to a specific engine or database, select it from the table under the toggle buttons. + - Define permissions more granularly using table views. + +## [](#delete-a-role)Delete a role + +You can delete a role using either the UI in the **Govern Workspace** or using SQL. + +### [](#delete-a-role-using-sql)Delete a role using SQL + +To delete a role using SQL, use [DROP ROLE](/sql_reference/commands/access-control/drop-role.html) as shown in the following code example: + +``` +DROP ROLE user_role; +``` + +### [](#delete-a-role-using-the-ui)Delete a role using the UI + +To delete a role via the UI: + +1. Select the **Govern** icon (![The Firebolt Govern Space icon.](../../assets/images/govern-icon.png)) from the left navigation bar to open the **Govern Space**. +2. Choose **Roles** from the left panel under **Govern**. +3. Search for the relevant role using the top search filters or by scrolling through the list. Hover over the right-most column to make the role menu appear, then choose **Delete role**. +4. Choose **Confirm**. + +## [](#grant-permissions-to-a-role)Grant permissions to a role + +### [](#grant-permissions-using-sql)Grant permissions using SQL + +To grant a permission to a role using SQL, use [GRANT](/sql_reference/commands/access-control/grant.html) as shown in the following code example: + +``` +GRANT USAGE ON DATABASE my_db TO user_role; +``` + +### [](#grant-permissions-using-the-ui)Grant permissions using the UI + +To grant a permission to a role via the UI: + +1. Select **Govern** to open the govern space, then choose **Roles** from the menu: +2. Search for the relevant role either by using the search filters at the top of the page, or by scrolling through the list of logins. Hover over the right-most column to make the role menu appear, then choose **Edit role**. +3. Navigate to the permissions tab and select the desired permissions. To grant permissions over all objects of that type, choose the topmost line. +4. Select **Update**. + +## [](#grant-a-role-to-users)Grant a role to users + +### [](#grant-a-role-to-users-using-sql)Grant a role to users using SQL + +To grant a role to a user or another role using SQL, use [GRANT ROLE](/sql_reference/commands/access-control/grant.html) as shown in the following code example: + +``` +GRANT ROLE user_role TO ROLE user2_role; +``` + +### [](#grant-a-role-using-the-ui)Grant a role using the UI + +To grant a role to a user via the UI: + +1. Select **Govern**, then choose **Users** from the menu: +2. In the user’s row, select the three horizontal dots to the right. +3. Select **Edit user details**. +4. Select the drop-down list next to **Role**. +5. Select the checkbox next to the roles that you want to grant. +6. Select **Edit user**. + +## [](#revoke-permissions)Revoke permissions + +You can revoke permissions using the UI in the **Govern Space** or using SQL. + +### [](#revoke-permissions-using-sql)Revoke permissions using SQL + +To revoke a permission from a role using SQL, use [REVOKE](/sql_reference/commands/access-control/revoke.html) as shown in the following example: + +``` +REVOKE USAGE ON DATABASE my_db FROM user_role; +``` + +### [](#revoke-permissions-using-the-ui)Revoke permissions using the UI + +To revoke permissions, follow the same steps described in [Grant permissions to a role](#grant-permissions-to-a-role). + +## [](#revoke-role)Revoke role + +You can revoke a role from either a user or another role using either the UI in the **Govern Space** or SQL. + +### [](#revoke-a-role-using-sql)Revoke a role using SQL + +To revoke a role from a user or another role using SQL, use the [REVOKE ROLE](/sql_reference/commands/access-control/revoke.html) statement. For example: + +``` +REVOKE ROLE user_role FROM USER alex; +``` + +### [](#revoke-a-role-using-the-ui)Revoke a role using the UI + +To revoke a role, follow the steps in [Grant a role to users](#grant-a-role-to-users). + +### [](#check-assigned-privileges-using-sql)Check assigned privileges using SQL + +To check the effective privileges for the current user, run the following example query: + +``` +SELECT + AR.grantee, + AR.role_name, + OP.privilege_type, + OP.object_type, + OP.object_name +FROM information_schema.transitive_applicable_roles AS AR +JOIN information_schema.object_privileges AS OP +ON (AR.role_name = OP.grantee) +WHERE + AR.grantee = session_user(); +``` + +**Returns**: + +grantee role\_name privilege\_type object\_type object\_name test\_user account\_admin USAGE engine engine1 test\_user account\_admin USAGE database db1 + +#### [](#owner-rights)Owner rights + +When a query is run on a view, the database checks and uses the permissions of the view’s owner to access the underlying objects that view references, rather than the permissions of the user that ran the query on the view. The view’s owner is the user that created the view. + +The following code example shows how granting and revoking privileges affects access to a base table and its view, ultimately causing an authorization failure when the view’s owner loses schema usage privileges: + +``` +CREATE USER user1 WITH ROLE=role1; +CREATE USER user2 WITH ROLE=role2; + +CREATE TABLE base_table (a int); -- executed by user1 +CREATE VIEW view_over_base_table AS SELECT * FROM base_table; -- executed by user1 + +GRANT SELECT ON VIEW view_over_base_table TO role2; +REVOKE SELECT ON TABLE base_table FROM role2; + +SELECT * FROM base_table; -- executed by user2, fails with an authorization error +SELECT * FROM view_over_base_table; -- executed by user2, successfully + +REVOKE USAGE ON SCHEMA public FROM role1; +-- role1 no longer has no access to the table due to missing schema usage privileges +SELECT * FROM view_over_base_table; -- executed by user2 and fails because the view owner's role1 cannot access table t +``` + +If the view owner’s privileges are revoked, the query will fail even if the user has access to the view. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso.md new file mode 100644 index 0000000..fe7f871 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso.md @@ -0,0 +1,28 @@ +# [](#configure-your-idp)Configure your IdP + +An **Identity Provider (IdP)** is a service that handles user authentication and manages user identities. When you set up Single Sign-On (SSO), the IdP verifies your users’ credentials and allows them to access multiple applications, including Firebolt, without needing to login repeatedly. + +For your organization, an IdP simplifies user management and strengthens security. You can enforce centralized security policies, like multi-factor authentication (MFA), and quickly revoke access when someone leaves the team. + +For your users, using single sign-on gives them access to all the tools they need, including Firebolt. + +Single-sign on (SSO) is an authentication process that allows access to multiple applications or services with a single set of credentials. It provides a centralized authentication mechanism for your organization so that it’s easier to manage user access, enforce security policies, and revoke access when necessary. + +## [](#pre-requisites)Pre-requisites + +Before you can use SSO with Firebolt, you must complete specific configuration steps in your Identity Provider (IdP) system, which is responsible for authenticating users and managing their credentials. Part of these steps include defining an **Audience URI**, which specifies the intended recipient of a SAML assertion about a user’s authentication. The configuration of an Audience URI depends on your IdP. See the following list of supported IdPs for specific instructions. + +If your Audience URI is not configured correctly, Security Assertion Markup Language (SAML) assertions used for authentication will fail, preventing users from signing in using SSO. + +## [](#supported-idps)Supported IdPs + +Firebolt allows you to sign in using federated identities. The SSO implementation supports the following IdPs: + +- [Auth0](/Guides/security/sso/auth0.html) +- [Okta](/Guides/security/sso/okta.html) +- [OneLogin](/Guides/security/sso/onelogin.html) +- [Salesforce](/Guides/security/sso/salesforce.html) +- [PingFederate (Ping Identity)](/Guides/security/sso/pingfederate.html) +- [Custom Identity provider](/Guides/security/sso/custom-sso.html) + +If your IdP is not listed but supports SAML2.0, contact the [Firebolt support team](mailto:support@firebolt.io). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso_auth0.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_auth0.md new file mode 100644 index 0000000..7785b24 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_auth0.md @@ -0,0 +1,99 @@ +# [](#auth0)Auth0 + +[Auth0](https://auth0.com/) is an identity management platform that provides authentication and authorization services for applications. Auth0 supports implementing secure login systems with authentication methods including single-sign on (SS0). + +To integrate Auth0 with Firebolt’s platform, you need to configure both an [Auth0 application for Firebolt](#configure-an-auth0-application) and [Firebolt’s SSO for Auth0](#configure-firebolt-for-auth0). Detailed instructions can be found in the following sections: + +## [](#configure-an-auth0-application)Configure an Auth0 application + +01. Login to your Auth0 Dashboard. If you don’t yet have an account with Auth0, you can [sign up](https://auth0.com/signup) to access their services. +02. Select **Applications** from the left navigation panel. +03. Select **Applications** again. +04. Select the **+ Create Application** button. +05. Under **Name**, enter a name for your application. +06. In the dropdown list under **Application Type**, select **Regular Web Application**. +07. Select **Create the Application**. +08. Once your application is created, it will appear under **Applications**. Select the three horizontal dots (…) next to your application’s name, and select **Settings** from the dropdown list. +09. Navigate to the **Application URIs** section. +10. In the textbox under **Application Login URI**, enter your Firebolt organization URL address, followed by `/login?`. For example, `https://staging-go.firebolt.io/login?`. +11. In the textbox under **Allowed Callback URLs** field, provide a callback URL with the following format: `https://id.app.firebolt.io/login/callback?connection=-&organization=`. For example, `https://id.app.firebolt.io/login/callback?connection=firebolt-staging-auth0&organization=org_UJhpsQ5ypXVU8JVB`. The following apply: + + - **``** - the organizational name used to create your Firebolt account referenced in your vanity URL. + - **``** - the provider, `Auth0`. + - **``** - the unique identifier for your organization in Firebolt. To retrieve your **``** , do the following: + + 1. Login to the [Firebolt Workspace](https://go.firebolt.io/signup). + 2. Select the **Configure** icon (![The Firebolt Configure Space icon.](../../../assets/images/configure-icon.png)). + 3. Select **SSO** from the left navigation pane. + 4. Select **Copy organization SSO identifier**. +12. Save the configuration. +13. Select the **Addons** tab at the top of the application work area. +14. Toggle **SAML2 WEB APP**. +15. In the **Usage** tab, do the following: + + 1. Copy the `Identity Provider Login URL` and save for the following Firebolt configuration step. + 2. Note the **Issuer** for the following Firebolt configuration step. + 3. Select **Download Auth0 certificate**. These are needed to configuring Firebolt to work with the Auth0 IdP. +16. Select the **Settings** tab. +17. Select **Enable** to enable the SSO using SAML2.0 on the IdP. You are now ready to configure Firebolt to use Auth0 as your IdP. + +## [](#configure-firebolt-for-auth0)Configure Firebolt for Auth0 + +Once your Identity Provider(IdP) is configured, you can now configure Firebolt to integrate with Auth0 either using SQL scripts in the **Develop Space** or through the user interface (UI) in the **Configure Space**. + +### [](#configure-firebolt-to-integrate-with-auth0-using-the-ui)Configure Firebolt to integrate with Auth0 using the UI + +1. Login to the [Firebolt Workspace](https://go.firebolt.io/signup). +2. Select the **Configure** icon (![The Firebolt Configure Space icon.](../../../assets/images/configure-icon.png)). +3. Select **SSO** from the left navigation pane. +4. Under **Configure SSO for your organization**, enter the following: + + 1. **Sign-on URL** - Enter the sign-on URL, provided by the SAML identity provider, where Firebolt will send SAML requests. The URL is specific to the IdP and is defined during configuration. For Auth0, this value corresponds to the Identity Provider Login URL value copied in **Step 15** of the [Auth0 application configuration](#configure-an-auth0-application). + 2. **Issuer** - A unique value generated by the SAML identity provider specifying the issuer value. The issuer corresponds to the **Issuer** value noted in **Step 15** of the [Auth0 application configuration](#configure-an-auth0-application). + 3. **Provider** - The provider’s name, `Auth0`. + 4. **Label**: The label to use for the SSO login button. You can use any label name. If the label is not provided, Firebolt uses the value in the **Provider** field. + 5. (Optional) **Sign-out URL** - An endpoint provided by Auth0 that facilitates the logout process by redirecting the user to this URL, ending their session. + 6. **Signing certificate** - A digital certificate used to verify the authenticity of a signature used to communication between Auth0 and Firebolt. The certificate must be in Privacy Enhanced Mail (PEM) or CER format, and can be uploaded from your computer by selecting **Import certificate** or entered in the text box under **Signing certificate**. + 7. **Field mapping** - A mapping used to match user attributes between Auth0 and Firebolt. Enter the **First name** and **Last name** in your Auth0 profile. Mapping is only required the first time a user logs in using SSO. + 8. Select **Update changes**. + +### [](#configure-firebolt-to-integrate-with-auth0-using-sql)Configure Firebolt to integrate with Auth0 using SQL + +Login to Firebolt’s [Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see [Get Started](/Guides/getting-started/). If you encounter any issues, reach out to [support@firebolt.io](mailto:support@firebolt.io) for help. Then, do the following: + +1. Select the Develop icon (![The Firebolt Develop Space icon.](../../../assets/images/develop-icon.png)). +2. By default, when you login to **Firebolt’s Workspace** for the first time, Firebolt creates a tab in the **Develop Space** called **Script 1**. The following apply: + + + +- The database that Script 1 will run using is located directly below the tab name. If you want to change the database, select another database from the drop-down list. +- An engine must be running to process the script in a selected tab. The name and status of the engine that Script 1 uses for computation is located to the right of the current selected database. + +Select system from the drop-down arrow next to the engine name. The system engine is always running, and you can use it to create a service account. You can also use an engine that you create. + +1. Use the syntax in the following example code to create an SSO connection in the **SQL Script Editor**: + +``` +ALTER ORGANIZATION vsko SET SSO = '{ + "signOnUrl": "https://dev-1234567890123456.us.auth0.com/samlp/123456789012345678901234567890123", + "signoutURL": "http://your-sign-out-URL", + "issuer": "auth0", + "provider": "auth0", + "label": "Auth0 Company IdP", + "fieldMapping": { + "given_name": "name", + "family_name": "surname" + }, + "certificate": "" +}'; +``` + +In the previous code example, the following apply: + +- `signOnUrl`- The sign-on URL, provided by the SAML identity provider, where Firebolt will send SAML requests. The URL is specific to the IdP and is defined during configuration. For Auth0, this value corresponds to the Identity Provider Login URL value copied in **Step 15** of the [Auth0 application configuration](#configure-an-auth0-application). +- (Optional)`signoutUrl`- An endpoint provided by Auth0 that facilitates the logout process by redirecting the user to this URL, ending their session. +- `issuer` - A unique value generated by the SAML identity provider specifying the issuer value. The issuer corresponds to the **Issuer** value noted in **Step 15** of the [Auth0 application configuration](#configure-an-auth0-application). +- `provider` - The provider’s name, `Auth0`. +- `label` - The label to use for the SSO login button. You can use any label name. If the label is not provided, Firebolt uses the value in the **Provider** field. +- `certificate` - A digital certificate used to verify the authenticity of a signature used to communication between Auth0 and Firebolt. The certificate must be in Privacy Enhanced Mail (PEM) or CER format. +- `field mapping` - A mapping used to match user attributes between Auth0 and Firebolt. Enter the first name and surname in your Auth0 profile. Mapping is only required the first time a user logs in using SSO. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso_custom_sso.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_custom_sso.md new file mode 100644 index 0000000..7637f79 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_custom_sso.md @@ -0,0 +1,161 @@ +# [](#custom-identity-provider)Custom Identity Provider + +A custom Identity Provider (IdP) allows you to use your organization’s existing authentication system for secure access to Firebolt using Single Sign-On (SSO). By configuring a custom IdP, you ensure that your team can securely and efficiently access Firebolt resources using familiar credentials. + +To integrate a custom IdP with Firebolt’s platform, you need to [configure your IdP for Firebolt](#configure-custom-identity-provider-idp) and [Firebolt’s SSO for your custom IdP](#configure-firebolt-for-custom-idp). Detailed instructions can be found in the following sections: + +## [](#configure-custom-identity-provider-idp)Configure Custom Identity Provider (IdP) + +In order to set up a SAML 2.0 compliant service or application as your Identity Provider (IdP) for Single Sign-On (SSO) with Firebolt, follow these steps: + +1. **Define a custom SHA-256** + + In the service/application interface, define a custom SHA-256 application specifically for Firebolt. Follow the service or application’s instructions to create this custom application. +2. **Create Users in the Service/Application** + + For each end-user that needs access to Firebolt: + + - Create a user in the service/application interface. + - Ensure that each user’s email address is correctly specified. Firebolt uses these email addresses to create corresponding logins in Firebolt. For more details, refer to [setting up SSO](/Guides/security/sso/). +3. **Obtain required values for IdP setup** + + To properly configure your IdP, you’ll need to configure the **Audience URI and ACS (Assertion Consumer Service) URL**, which are crucial for successful SSO authentication. If not configured properly, authentication will fail. + + The **Audience URI** ensures that authentication requests are directed to the correct Firebolt tenant, and has the following format: `urn:auth0:app-firebolt-v2:-`. + + The **ACS URL** is where the IdP sends authentication responses after login, and has the following format: `https://id.app.firebolt.io/login/callback?connection=-&organization=`. + + In the previous example formats, the following apply: + + - **``** : The organizational name used to create your Firebolt account, as seen in your vanity URL. + - **``** : The provider being configured as your IdP + - **``** : A unique identifier for your organization. To retrieve this value, navigate to **Configure > SSO** in the Firebolt UI and select **Copy organization SSO identifier**. + + For example, if your organization name is `acmeorg` and the provider name is `custom`, the values for Audience URI and ACS URL should be as follows: + + - Audience URI: `urn:auth0:app-firebolt-v2:acmeorg-custom` + - ACS URL: `https://id.app.firebolt.io/login/callback?connection=acmeorg-custom&organization=` + + The **Audience URI** (also known as Audience Restriction) defines the intended recipient of the SAML (Security Assertion Markup Language) Assertion. Depending on the vendor, this might also be referred to as the **Entity ID**. +4. **Obtain SSO URL and Certificate** + + Retrieve the following from your custom IdP: + + - **SSO URL** : The endpoint where Firebolt sends SAML requests. + - **Certificate** : Used to verify communication between the IdP and Firebolt. + +With all required information, you are now ready to integrate your Identity Provider with Firebolt. + +## [](#configure-firebolt-for-custom-idp)Configure Firebolt for custom IdP + +Once your Identity Provider(IdP) is configured, you can now configure Firebolt to integrate with your IdP. This can be done using either the Firebolt UI, or using SQL. + +### [](#integrate-with-idp-using-the-ui)Integrate with IdP using the UI + +1. To configure the Firebolt SSO integration using the UI, Navigate to **Configure > SSO** in Firebolt. +2. Enter the following information: + + - `Sign-on URL`: The URL provided by your SAML identity provider where Firebolt sends SAML requests. This URL is IdP-specific and is determined during the identity provider’s configuration. + +Example (for Okta): +`https://okta_account_name.okta.com/app/okta_firebolt_app_id/sso/saml` + +- `Issuer`: A unique value generated by the SAML identity provider, identifying the issuer. +- `Provider`: The name of your identity provider, such as `JumpCloud`. + If you are using a SAML 2.0-compliant service or application as your IdP, select the **Custom** label. +- `Label`: The text displayed on the SSO login button. If left blank, the value from the **Provider** field will be used. +- `Certificate`: The certificate used to verify communication between the identity provider and Firebolt. It must be in PEM or CER format. You can upload it using the **Import certificate** button or paste it directly into the provided text box. +- `Sign-out URL`: The URL provided by the application owner to redirect users when they sign out. +- `Field mapping`: Mapping to your identity provider’s first and last name in key-value pairs. If additional fields are required, choose **Add another key-value pair**. Mapping is required for Firebolt to fill in the login’s given and last names the first time the user logs in using SSO. If this field remains empty when a login that represents the user is being created (read more in the [log in using SSO](#log-in-using-sso) section), the login’s first and last name fields will contain “NA”. Those fields can be updated later by running the [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) command. Here’s an example of how to set up **Field mapping**: + + ``` + { + "given_name": "name", + "family_name": "surname" + } + ``` + + In this example: + + - given\_name (first name) is mapped to the `name` field from the IdP. + - family\_name (last name) is mapped to the `surname` field from the IdP. + + + +1. Select **Update changes**. + +### [](#integrate-with-idp-using-sql)Integrate with IdP using SQL + +To create your SSO connection in Firebolt, you can use the following SQL as an example: + +``` +ALTER ORGANIZATION vsko SET SSO = '{ + "signOnUrl": "https://dev-a8jnpkgk4y7gylt5.us.auth0.com/samlp/9aLOXgDHcqxW1gWtBuWNxVLbKNyv1LQV", + "issuer": "okta", + "provider": "okta", + "label": "Okta Company IdP", + "fieldMapping": { + "given_name": "name", + "family_name": "surname" + }, + "certificate": "-----BEGIN CERTIFICATE-----SampleCertificate-----END CERTIFICATE-----" +}'; +``` + +Make sure that the certificate value is provided as one string, without any line breaks or control characters such as `\r\n`. + +## [](#log-in-using-sso)Log in using SSO + +1. Visit [go.firebolt.io/login](https://go.firebolt.io/login). +2. Enter your organization name and select **Continue to login**. If you don’t remember your organization name, select **Find out** next to **Don’t know your organization name?**. +3. Enter the email address you use for Firebolt and select **Send link**. +4. Check your inbox for an email containing a direct login link for your organization. Bookmark this link for future use. +5. Select \*\*Login with \*\*. You’ll be redirected to your identity provider (IdP) for authentication. Once authenticated, you’ll return to Firebolt. + +**During Login** + +- **New Users**: + If a login with your email doesn’t already exist, Firebolt will create one based on the email, first name, and last name provided in the SAML assertion from the IdP. + The new login will be SSO-only, with the `IS_PASSWORD_ENABLED` property set to `False`. +- **Existing Users**: + If the login already exists and **Field Mapping** is set: + + - If your first or last name differs from what’s specified in the IdP, Firebolt will update those fields. + - If the names match or Field Mapping is empty, the existing fields will remain unchanged and it will authenticate as usual. + +## [](#edit-sso-settings)Edit SSO settings + +SSO settings can be edited in two ways - using SQL or the UI. To edit SSO settings using SQL, use the [ALTER ORGANIZATION](/sql_reference/commands/data-definition/alter-organization.html) statement. For example: + +``` +ALTER ORGANIZATION vsko SET SSO = '{ + "signOnUrl": "https://abc.okta.com/app/okta_firebolt_app_id/sso/saml", + "signOutUrl": "https://myapp.exampleco.com/saml/logout", + "issuer": "issuer", + "provider": "Okta", + "label": "Okta", + "fieldMapping": "mapping", + "certificate": "-----BEGIN CERTIFICATE-----SampleCertificate-----END CERTIFICATE-----" +}'; +``` + +To edit SSO settings using the UI, see [Configure Firebolt to integrate with IdP using the UI](#integrate-with-idp-using-the-ui). + +## [](#delete-sso)Delete SSO + +To disable SSO login, you can delete the SSO settings using either SQL or the UI. To modify SSO settings using SQL, use the following command: + +``` +ALTER ORGANIZATION vsko SET SSO = DEFAULT; +``` + +To modify SSO settings using the UI: + +1. Select **Configure** to open the **Configure Space**, then choose **SSO**. +2. Select **Clear SSO configuration**. +3. Select **Update changes**. + +After the SSO configuration is deleted: + +- Users who were created through SSO will remain in your organization but will no longer be able to log in to Firebolt unless password-based login is enabled for them. You can enable this using the [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) command. +- All logins with `is_sso_provisioned=true` will automatically be updated to `sso_provisioned=false`. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso_okta.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_okta.md new file mode 100644 index 0000000..ac62053 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_okta.md @@ -0,0 +1,95 @@ +# [](#okta)Okta + +Okta is an identity and access management platform that enables secure Single Sign-On (SSO). Integrating Okta with Firebolt allows users to login using their existing credentials, simplifying access while maintaining security. This setup helps you and your team centralize authentication and manage user access. + +To integrate Okta with Firebolt’s platform, you need to [configure your Okta application for Firebolt](#configure-okta-application) and [Firebolt’s SSO for Okta](#configure-okta-application). Detailed instructions can be found in the following sections: + +## [](#configure-okta-application)Configure Okta application + +01. In the Okta Admin Console, select **Applications** from the left navigation panel. +02. Select **Applications** again. +03. Select **Create a new app integration**. +04. Select **SAML 2.0** as the sign-in method and choose **Next**. +05. Fill out **General Settings** and navigate to the next section **Configure SAML**. +06. Complete the following fields: + + - **Single sign-on URL.** This URL has the following format `https://id.app.firebolt.io/login/callback?connection=-&organization=` + + - **``** represents the organization name used to create your Firebolt Account. This org name is referenced in your vanity URL. + - **``** represents the provider we’re configuring as our IdP. + - **``** - the unique identifier for your organization in Firebolt. To retrieve your **``** , do the following: + + 1. Login to the [Firebolt Workspace](https://go.firebolt.io/signup). + 2. Select the **Configure** icon (![The Firebolt Configure Space icon.](../../../assets/images/configure-icon.png)). + 3. Select **SSO** from the left navigation pane. + 4. Select **Copy organization SSO identifier**. + + **Example:** https://id.app.firebolt.io/login/callback?connection=vsko-okta&organization=org\_82u3nzTNQPA8RyoM + - **Audience URI (SP Entity ID).** This URI has the following format: `urn:auth0::-`, where `` is app-firebolt-v2, `` is the name of organization provider and `` is the provider value set in Firebolt configuration step. + + **Example:** `urn:auth0:app-firebolt-v2:vsko-okta` +07. Save the configuration. +08. Open the **Sign On** tab of your created app integration, and select the **SAML 2.0** tab. Select **More details** to expand additional information. +09. Copy or note down the value for **Sign on URL** and **Issuer**. +10. Download the **Signing Certificate**. + +## [](#configure-firebolt-to-integrate-with-okta)Configure Firebolt to integrate with Okta + +Once your Identity Provider(IdP) is configured, you can now configure Firebolt to integrate with Okta either using SQL scripts in the **Develop Space** or through the user interface (UI) in the **Configure Space**. + +### [](#configure-firebolt-to-integrate-with-okta-using-the-ui)Configure Firebolt to integrate with Okta using the UI + +1. Login to the [Firebolt Workspace](https://go.firebolt.io/signup). +2. Select the **Configure** icon (![The Firebolt Configure Space icon.](../../../assets/images/configure-icon.png)). +3. Select **SSO** from the left navigation pane. +4. Under **Configure SSO for your organization**, enter the following: + + 1. **Sign-on URL** - Enter the sign-on URL, provided by the SAML identity provider, where Firebolt will send SAML requests. The URL is specific to the IdP and is defined during configuration. For Okta, this value corresponds to the Sign on URL value copied in **Step 6**. + 2. **Issuer** - A unique value generated by the SAML identity provider specifying the issuer value. The issuer corresponds to the **Issuer** value noted in **Step 9**. + 3. **Provider** - The provider’s name, `Okta`. + 4. **Label**: The label to use for the SSO login button. You can use any label name. If the label is not provided, Firebolt uses the value in the **Provider** field. + 5. (Optional) **Sign-out URL** - An endpoint provided by Okta that facilitates the logout process by redirecting the user to this URL, ending their session. + 6. **Signing certificate** - A digital certificate used to verify the authenticity of a signature used to communication between Okta and Firebolt. The certificate needs to be in PEM or CER format, and can be uploaded from your computer by selecting **Import certificate** or entered in the text box under **Signing certificate**. + 7. **Field mapping** - A mapping used to match user attributes between Okta and Firebolt. Enter the **First name** and **Last name** in your Okta profile. Mapping is only required the first time a user logs in using SSO. + 8. Select **Update changes**. + +### [](#configure-firebolt-to-integrate-with-okta-using-sql)Configure Firebolt to integrate with Okta using SQL + +Login to Firebolt’s [Workspace](https://go.firebolt.io/login). Then, do the following: + +1. Select the Develop icon (![The Firebolt Develop Space icon.](../../../assets/images/develop-icon.png)). +2. By default, when you login to **Firebolt’s Workspace** for the first time, Firebolt creates a tab in the **Develop Space** called **Script 1**. The following apply: + + + +- The database that Script 1 will run using is located directly below the tab name. If you want to change the database, select another database from the drop-down list. +- An engine must be running to process the script in a selected tab. The name and status of the engine that Script 1 uses for computation is located to the right of the current selected database. + +Select system from the drop-down arrow next to the engine name. The system engine is always running, and you can use it to create a service account. You can also use an engine that you create. + +1. Use the syntax in the following example code to create an SSO connection in the **SQL Script Editor**: + +``` +ALTER ORGANIZATION vsko SET SSO = '{ + "signOnUrl": "https://dev-1234567890123456.us.okta.com/samlp/123456789012345678901234567890123", + "signoutURL": "http://your-sign-out-URL", + "issuer": "okta", + "provider": "okta", + "label": "Okta Company IdP", + "fieldMapping": { + "given_name": "name", + "family_name": "surname" + }, + "certificate": "" +}'; +``` + +In the previous code example, the following apply: + +- `signOnUrl`- The sign-on URL, provided by the SAML identity provider, where Firebolt will send SAML requests. The URL is specific to the IdP and is defined during configuration. For Okta, this value corresponds to the **Sign on URL** value copied in **Step 9**. +- (Optional)`signoutUrl`- An endpoint provided by Okta that facilitates the logout process by redirecting the user to this URL, ending their session. +- `issuer` - A unique value generated by the SAML identity provider specifying the issuer value. The issuer corresponds to the **Issuer** value noted in **Step 9**. +- `provider` - The provider’s name, `Okta`. +- `label` - The label to use for the SSO login button. You can use any label name. If the label is not provided, Firebolt uses the value in the **Provider** field. +- `certificate` - A digital certificate used to verify the authenticity of a signature used to communication between Okta and Firebolt. The certificate must be in Privacy Enhanced Mail (PEM) or CER format. +- `field mapping` - A mapping used to match user attributes between Okta and Firebolt. Enter the first name and surname in your Okta profile. Mapping is only required the first time a user logs in using SSO. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso_onelogin.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_onelogin.md new file mode 100644 index 0000000..9ef47ac --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_onelogin.md @@ -0,0 +1,92 @@ +# [](#onelogin)OneLogin + +OneLogin is a cloud-based identity and access management platform that simplifies secure access to applications through single sign-on (SSO). Integrating OneLogin with Firebolt enhances security and streamlines user authentication for your team. + +To integrate OneLogin with Firebolt’s platform, you need to configure both a [OneLogin application for Firebolt](#configure-onelogin-application) and [Firebolt’s SSO for OneLogin](#configure-firebolt-for-onelogin). Detailed instructions can be found in the following sections: + +## [](#configure-onelogin-application)Configure OneLogin application + +1. In OneLogin, open to the dashboard and select **Applications** and **Add App**. +2. Search for **SAML**, and select **SAML Test Connector (IdP w/attr)**. +3. Change the Display Name of the app and select **Save**. This will be the name of the app that will appear in your OneLogin portal. +4. Open the **SSO** tab and copy the value for the **SAML 2.0 Endpoint (HTTP)**. This value will later be used in the Firebolt Configuration step. Note that logout endpoint is not used at this time. +5. Select the **View Details** link at the X.509 Certificate field and copy/download the certificate. +6. Navigate to the **Configuration** tab and fill in the following values: + + - **Audience** - a URI in the following format: `urn:auth0::-`, where `` is app-firebolt-v2, `` is the name of organization, and `` is the provider value set in Firebolt configuration step. **Example:** `urn:auth0:app-firebolt-v2:vsko2-onelogin` + - **ACS (Consumer) URL Validator** - a valid regular expression. This field is used to ensure OneLogin posts the response to the correct URL, and it validates the ACS URL field. + - **ACS (Consumer) URL** - the post-back URL for your organization. This is the URL in the following format: `https://id.app.firebolt.io/login/callback?connection=-&organization=`. The organization\_identifier is needed to select the correct organization during redirects. The authentication flow will fail if this is incorrectly provided or left blank. **Example:** https://id.app.firebolt.io/login/callback?connection=vsko2-onelogin&organization=org\_82u3nzTNQPA8RyoM + + > **``** represents the Organizational name used to create your Firebolt Account. The org name is referenced in your vanity URL. + > **``** represents the provider we’re configuring as our IdP. **``** is the unique identifier for your Organization. To retrieve your **``** , you can navigate to **Configure > SSO** in the Firebolt UI, and **Click Copy organization SSO identifier**. + +## [](#configure-firebolt-for-onelogin)Configure Firebolt for OneLogin + +Once your OneLogin application is configured, you can now configure Firebolt to integrate with OneLogin. This can be done using the Firebolt UI, or with SQL. + +### [](#integrate-with-onelogin-using-the-ui)Integrate with OneLogin using the UI + +1. To configure the Firebolt SSO integration with OneLogin using the UI, Navigate to **Configure > SSO** in Firebolt. +2. Enter the following information: + + - `signOnUrl`: The sign-on URL, provided by the SAML identity provider, to which Firebolt sends the SAML requests. The URL is IdP-specific and is determined by the identity provider during configuration. In Onelogin, this is the **SAML 2.0 Endpoint (HTTP)** value we copied in step 4. + - `signoutUrl(optional)`: The sign-out URL, provided by the application owner, to be used when the user signs out of the application.\`\`\` + - `issuer`: A unique value generated by the SAML identity provider specifying the issuer value. + - `provider`: The provider’s name - for example: `OneLogin`. + - `label`: The label to use for the SSO login button. If not provided, the Provider field value is used. + - `certificate`: The certificate to verify the communication between the identity provider and Firebolt. The certificate needs to be in PEM or CER format, and can be uploaded from your computer by choosing **Import certificate** or entered in the text box. + - `field mapping`: Mapping to your identity provider’s first and last name in key-value pairs. If additional fields are required, choose **Add another key-value pair**. Mapping is required for Firebolt to fill in the login’s given and last names the first time the user logs in using SSO. + + Here’s an example of how to set up **Field mapping**: + + ``` + { + "given_name": "name", + "family_name": "surname" + } + ``` + + In the previous example: + + - given\_name (first name) is mapped to the `name` field from the IdP. + - family\_name (last name) is mapped to the `surname` field from the IdP. +3. Select **Update changes**. + +### [](#integrate-with-idp-using-sql)Integrate with IdP using SQL + +To create your SSO connection in Firebolt, you can use the following SQL as an example: + +``` +ALTER ORGANIZATION vsko SET SSO = '{ + "signOnUrl": "https://vsko-test.onelogin.com/trust/saml2/http-post/sso/aa", + "issuer": "onelogin", + "provider": "onelogin", + "label": "OneLogin Company App", + "fieldMapping": { + "given_name": "name", + "family_name": "surname" + }, + "certificate": "" +}'; +``` + +where + +- `organization_name` is the name of the organization in Firebolt, +- `signOnUrl` is the SAML 2.0 Endpoint (HTTP) value copied during OneLogin setup, +- `issuer` is the name of the issuer, ‘onelogin’ in this case, +- `provider` is the IdP name, ‘onelogin’ in this case, +- `label` is text that will appear on the **Sign in** form (this defaults to `-` if a value is not provided, for instance ‘acme-onelogin\`) +- `certificate` is the X.509 Certificate copied during OneLogin setup, and +- `field_mapping` includes additional fields to be mapped from the SAML assertion, based on what was configured during OneLogin setup. For exaxmple: + +``` +{ + "given_name": "name", + "family_name": "surname" +} +``` + +This corresponds to the following setup in OneLogin, where name/surname in OneLogin corresponds to values in JSON: + +![Edit first name](../../../assets/images/onelogexample1.png) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/guides_security_sso_pingfederate.md b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_pingfederate.md new file mode 100644 index 0000000..9d0d8c1 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/guides_security_sso_pingfederate.md @@ -0,0 +1,74 @@ +# [](#pingfederate-ping-identity)PingFederate (Ping Identity) + +PingFederate is an identity and access management solution that enables secure single sign-on, identity federation, and authentication across multiple applications and platforms, supporting protocols like SAML, OAuth, and OpenID Connect. You can use PingFederate to enable secure single sign-on (SSO) and federated identity management for authentication across Firebolt. + +To integrate PingFederate with Firebolt’s platform, you need to configure both an [PingFederate application for Firebolt](#configure-pingfederate-application) and [Firebolt’s SSO for PingFederate](#configure-firebolt-for-pingfederate). Detailed instructions can be found in the following sections: + +#### [](#configure-pingfederate-application)Configure PingFederate application + +1. Select **Applications** in the administration menu. Then on the **Applications** page, select **+** to create a new application. +2. Type in the application name (for instance, Firebolt) and description. Then select **SAML Application**, and select **Configure**. +3. Choose **Manually Enter** to provide application metadata. + + - Fill the **ACS URLs** field with a URL in the following format: `https://id.app.firebolt.io/login/callback?connection=-&organization=` For example: `https://id.app.firebolt.io/login/callback?connection=acmeorg-pingfederate&organization=org_82u3nzTNQPA8RyoM` + + > **``** represents the Organizational name used to create your Firebolt Account. The org name is referenced in your vanity URL. + > **``** represents the provider we’re configuring as our IdP. **``** is the unique identifier for your Organization. To retrieve your **``** , you can navigate to **Configure > SSO** in the Firebolt UI, and **Click Copy organization SSO identifier**. + - Fill in **Entity ID** with the value: `urn:auth0:app-firebolt-v2:-`, where + - `` is the name of the organization in Firebolt, and + - `` is the provider value set in Firebolt configuration step For example: `urn:auth0:app-firebolt-v2:acmeorg-pingfederate` +4. Select **Save**. +5. From the **Configuration** tab: + + - Download the signing certificate in X509 PEM format + - Save the value of Single SignOn Service +6. Open the **Attribute Mappings** tab, and edit the saml\_subject PingOne Mapping to map to Email Address. Then **Save**. + +#### [](#configure-firebolt-for-pingfederate)Configure Firebolt for PingFederate + +Once your Identity Provider(IdP) is configured, you can now configure Firebolt to integrate with your IdP. This can be done either using the Firebolt UI, or using SQL. + +##### [](#ui)UI + +1. To configure the Firebolt SSO integration with PingFederate using the UI, Navigate to **Configure > SSO** in Firebolt. +2. Once there, enter your Sign-on URL, Issuer, Provider, Label, Certificate, and field-mappings, where + + - `signOnUrl`: The sign-on URL, provided by the SAML identity provider, to which Firebolt sends the SAML requests. The URL is IdP-specific and is determined by the identity provider during configuration. + - `signoutUrl(optional)`: The sign-out URL, provided by the application owner, to be used when the user signs out of the application. In Pingfederate, you can retrieve this value by copying the `Single Logout Service` URL found in **Application > Configuration**.\`\`\` + - `issuer`: A unique value generated by the SAML identity provider specifying the issuer value. + - `provider`: The provider’s name - for example: `PingFederate`. + - `label`: The label to use for the SSO login button. If not provided, the Provider field value is used. + - `certificate`: The certificate to verify the communication between the identity provider and Firebolt. The certificate needs to be in PEM or CER format, and can be uploaded from your computer by choosing **Import certificate** or entered in the text box. + - `field mapping`: Mapping to your identity provider’s first and last name in key-value pairs. If additional fields are required, choose **Add another key-value pair**. Mapping is required for Firebolt to fill in the login’s given and last names the first time the user logs in using SSO. Here’s an example of how to set up field mapping: + + ``` + { + "given_name": "name", + "family_name": "surname" + } + ``` + + In the previous code example, `given_name` is your first name, and is mapped to the “name” field from the IDP. The `family_name` is your last name, and is mapped from the “surname” field. +3. Choose **Update changes**. + +##### [](#sql)SQL + +Values for SQL to create the SSO connection are as follows: + +``` +ALTER ORGANIZATION acmeorg SET SSO = '{ + "signOnUrl": "https://auth.pingone.eu/74d536da-4d98-4fdd-83ae-63af461eb826/saml20/idp/sso", + "issuer": "pingfederate", + "provider": "pingfederate", + "label": "PingFederate Company App", + "certificate": "" +}'; +``` + +where + +- `signOnURL` is the Single Sign On Service URL obtained during PingFederate configuration, +- `issuer` is the name of the issuer, ‘pingfederate’ in this case, +- `provider` is the IdP name, ‘pingfederate’ in this case, +- `label` is the text that will appear on the **Sign in** form (this defaults to `-.my.salesforce.com/idp/endpoint/HttpPost`. Save this value to be used as the SignOnURL in Firebolt SSO configuration. + +1. Select **Download Certificate**, and convert the downloaded .crt file to PEM format. You could do this using the following command: `openssl x509 -in original.crt -out sfcert.pem -outform PEM` where `original.crt` is the name of the downloaded .crt file. +2. Select on the provided link to create a new connected app in Salesforce. +3. You will be redirected to the **Manage Connected Apps / New Connected App** view. Fill in required fields **Connected App Name, API Name** (for instance, type ‘Firebolt’) and **Contact email**. +4. Move to **Web App Settings,** and check the **Enable SAML** box. +5. Fill in the Entity Id field with value: `urn:auth0:app-firebolt-v2:-`, where + + - `` is the name of the organization in Firebolt, and + - `` is the IdP name, ‘salesforce’ in this case For example: `urn:auth0:app-firebolt-v2:acmeorg-salesforce` +6. Fill the ACS URL field with a URL in the following format (contact Firebolt to get your organization\_identifier) `https://id.app.firebolt.io/login/callback?connection=-&organization=` For example: `https://id.app.firebolt.io/login/callback?connection=acmeorg-salesforce&organization=org_82u3nzTNQPA8RyoM` + + > **``** represents the Organizational name used to create your Firebolt Account. The org name is referenced in your vanity URL. + > **``** represents the provider we’re configuring as our IdP. **``** is the unique identifier for your Organization. To retrieve your **``** , you can navigate to **Configure > SSO** in the Firebolt UI, and **Click Copy organization SSO identifier**. +7. Keep **Subject Type** as Username, and **Name ID Format** as unspecified. Select **Save**. + +#### [](#configure-firebolt-for-salesforce)Configure Firebolt for SalesForce + +Once your Identity Provider(IdP) is configured, you can now configure Firebolt to integrate with your IdP. This can be done using either the Firebolt UI, or using SQL. + +##### [](#ui)UI + +1. To configure the Firebolt SSO integration with Salesforce using the UI, Navigate to **Configure > SSO** in Firebolt. +2. Once there, enter your Sign-on URL, Issuer, Provider, Label, Certificate, and field-mappings, where + + + +- `signOnUrl`: The sign-on URL, provided by the SAML identity provider, to which Firebolt sends the SAML requests. The URL is IdP-specific and is determined by the identity provider during configuration. +- `signoutUrl(optional)`: The sign-out URL, provided by the application owner, to be used when the user signs out of the application.\`\`\` +- `issuer`: A unique value generated by the SAML identity provider specifying the issuer value. +- `provider`: The provider’s name - for example: `Salesforce`. +- `label`: The label to use for the SSO login button. If not provided, the Provider field value is used. +- `certificate`: The certificate to verify the communication between the identity provider and Firebolt. The certificate needs to be in PEM or CER format, and can be uploaded from your computer by choosing **Import certificate** or entered in the text box. +- `field mapping`: Mapping to your identity provider’s first and last name in key-value pairs. If additional fields are required, choose **Add another key-value pair**. Mapping is required for Firebolt to fill in the login’s given and last names the first time the user logs in using SSO. Here’s an example of how to set up field mapping: + + ``` + { + "given_name": "name", + "family_name": "surname" + } + ``` + + In the previous code example, `given_name` is your first name, and is mapped to the “name” field from the IDP. The `family_name` is your last name, and is mapped from the “surname” field. 3. Choose **Update changes** + +##### [](#sql)SQL + +Values for SQL to create the SSO connection are as follows: + +``` +ALTER ORGANIZATION acmeorg SET SSO = '{ + "signOnUrl": "https://firebolttest-dev-ed.my.salesforce.com/idp/endpoint/HttpPost", + "issuer": "salesforce", + "provider": "salesforce", + "label": "Salesforce Company App", + "certificate": "" +}'; +``` + +where + +- `signOnURL` is the SAML 2.0 endpoint value copied during Salesforce setup, +- `issuer` is the name of the issuer, ‘salesforce’ in this case, +- `provider` is the IdP name, ‘salesforce’ in this case, +- `label` is text that will appear on the **Sign in** form (this defaults to ‘-’ if a value is not provided, for instance ‘acme-salesforce\`), and +- `certificate` is the X.509 certificate in PEM format downloaded during setup. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/index.md b/cmd/docs-scrapper/fireboltdocs/index.md new file mode 100644 index 0000000..1e13c9b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/index.md @@ -0,0 +1,47 @@ +# Firebolt documentation + +Firebolt is a cloud data warehouse engineered ground up for low latency analytics on data sets of all sizes. + +Dive into our documentation to find solutions to common questions through our guides and resources built for your successful development and integration with Firebolt. + +## Top picks + +[**What is Firebolt?** +\ +Find out about Firebolt's product and key benefits.](../product/index.html) + +[**Get started** +\ +Explore our guide on creating a database and engine and loading data.](../Guides/getting-started/index.html) + +[**Load data** +\ +Use a wizard to load data quickly or use SQL scripts for a customized workflow.](../Guides/loading-data/loading-data.html) + +[**Engine fundamentals** +\ +Learn about Firebolt engines and how to use them to ingest data and execute DML queries.](../Overview/engine-fundamentals.html) + +[**Data modeling** +\ +Design your data model and use databases, tables, and indexes to optimize query performance.](../Overview/indexes/using-indexes.html) + +## Documentation resources + +![Overview Icon](../assets/images/overview-icon.svg) + +### Overview + +- [Architecture](../Overview/architecture-overview.html)[Pricing and billing](../Overview/billing/index.html)[Organization and accounts](../Overview/organizations-accounts.html)[Engine fundamentals](../Overview/engine-fundamentals.html)[Data modeling](../Overview/indexes/using-indexes.html)[Data management](../Overview/data-management.html)[Queries](../Overview/queries/index.html)[Security](../Overview/Security/security.html) + +![Guides Icon](../assets/images/guides-icon.png) + +### Guides + +- [Manage organizations](../Guides/managing-your-organization/index.html)[Get started](../Guides/getting-started/index.html)[Operate engines](../Guides/operate-engines/operate-engines.html)[Load data](../Guides/loading-data/loading-data.html)[Query data](../Guides/query-data/index.html)[Configure security](../Guides/security/index.html)[Develop with Firebolt](../Guides/developing-with-firebolt/index.html)[Integrate with Firebolt](../Guides/integrations/integrations.html)[Work with semi-structured data](../Guides/loading-data/working-with-semi-structured-data/working-with-semi-structured-data.html)[Export data](../Guides/exporting-data.html) + +![Reference Icon](../assets/images/reference-icon.png) + +### Reference + +- [Release notes](../Reference/release-notes/release-notes.html)[Available regions](../Reference/available-regions.html)[System settings](../Reference/system-settings.html)[Object identifiers](../Reference/object-identifiers.html)[Reserved words](../Reference/reserved-words.html)[Help menu](../Reference/help-menu.html)[SQL reference](../sql_reference/index.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview.md b/cmd/docs-scrapper/fireboltdocs/overview.md new file mode 100644 index 0000000..970f091 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview.md @@ -0,0 +1,14 @@ +# [](#overview)Overview + +Read about Firebolt architecture and how it enables differentiated speed, scale, concurrency and price-performance characteristics. Learn core Firebolt concepts such as Engines, loading data using COPY FROM and Data Management Lifecycle in Firebolt. + +* * * + +- [Architecture](/Overview/architecture-overview.html) +- [Pricing and billing](/Overview/billing/) +- [Organizations and accounts](/Overview/organizations-accounts.html) +- [Engine Fundamentals](/Overview/engine-fundamentals.html) +- [Data modeling](/Overview/indexes/using-indexes.html) +- [Data management](/Overview/data-management.html) +- [Queries](/Overview/queries/) +- [Security](/Overview/Security/security.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_architecture_overview.md b/cmd/docs-scrapper/fireboltdocs/overview_architecture_overview.md new file mode 100644 index 0000000..e51f945 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_architecture_overview.md @@ -0,0 +1,39 @@ +# [](#firebolt-architecture)Firebolt architecture + +The following diagram gives a high-level overview of Firebolt’s structural architecture. Firebolt’s components, which include management, compute, and storage layers, interact with a variety of workloads to enhance performance, scalability, and resource efficiency. + +![Firebolt's architecture has management, compute, and storage layers that work with various workloads.](../../assets/images/fireboltarchitecture.png) + +## [](#management-layer)Management layer + +Firebolt’s **management layer** handles key administrative functions including managing metadata, security settings, and observability, all in one place. Through this layer, administrators can oversee and control user access, permissions, and roles, ensuring robust security. It also provides insights into system performance and resource usage, providing operational visibility. The management layer also streamlines workspace management, enabling seamless organization and monitoring of data environments, further simplifying the overall administration process. + +Firebolt’s management layer consists of the following: + +- **Administration** - Manage access control, user roles, permissions, and system configurations. +- **Metadata** - Track and manage data definitions, schema information, and relationships within the database. +- **Security** - Configure and enforce access controls, authentication, encryption, and user permissions to protect data and ensure compliance. +- **Observability** - Monitor system performance, track query performance, and analyze resource utilization so that you can optimize operations and troubleshoot issues. +- **Workspace** - Organize and manage data environments, resources, and project-specific configurations, enabling collaboration and streamlining data workflows. + +## [](#compute-layer)Compute layer + +Firebolt’s **compute layer** is responsible for running queries and processing data through its scalable engines. Engines use parallel processing to deliver high performance and efficiency. You can create and configure multiple engines tailored to different workflows, such as data integration or analytical queries supporting customer facing analytics. Engines can be configured for different needs like query latency, throughput, and concurrency, adapting to specific data processing tasks. + +Firebolt’s compute layer features the following: + +- **Scalable engines** - Firebolt’s compute layer provides scalable engines that process queries in parallel, ensuring high-speed performance and efficiency for diverse workloads. +- **Customizable configurations** - You can tailor engine configurations to match your specific workflow needs. +- **Multiple engines for workload isolation** - You can create multiple engines to handle mixed workloads. Each workload runs in an isolated compute environment, so that you can avoid performance issues caused by noisy neighbors. +- **On-demand resource allocation** - Engines can be started, stopped, or resized based to match current workload demands. + +## [](#storage-layer)Storage layer + +Firebolt’s **storage layer** efficiently manages large amounts of data by keeping storage separate from the compute process, which means that you can store as much data as needed without impacting compute resources. It uses cloud-based storage for high availability and durability, while also reducing costs. Data is stored in a compressed, column-based format to save space and improve query performance. Firebolt’s indexing features further speed up data retrieval by reducing the need to scan large datasets. This separation between storage and compute allows users to scale their storage needs independently, making resource management more flexible and cost-efficient. + +Firebolt’s storage layer features the following: + +- **Separation of storage and compute** - You can scale data storage independently of the computing resources used for processing queries. This separation lets you adjust storage without affecting compute resources, allowing for flexible and efficient resource management. +- **Cloud-based and cost-effective** - Firebolt leverages cloud storage for high availability, durability, and reduced costs. +- **Columnar format** - Data is stored in a compressed, column-based format, optimizing both storage efficiency and query performance. +- **Fast data retrieval with advanced indexing** - Indexing features including primary and aggregating indexes speed up queries by minimizing the need to scan large datasets. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_billing.md b/cmd/docs-scrapper/fireboltdocs/overview_billing.md new file mode 100644 index 0000000..becf0bf --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_billing.md @@ -0,0 +1,164 @@ +# [](#firebolt-pricing-and-deployment-models)Firebolt pricing and deployment models + +Firebolt offers flexible deployment models that provide scalability and cost efficiency to meet diverse business needs. Choose between fully-managed and self-managed solutions tailored to your infrastructure and workload requirements. + +![Firebolt offers fully-managed and self-managed deployment models](../../assets/images/deployment-models.png) + +## [](#choose-your-deployment-model)Choose your deployment model + +Firebolt offers two deployment models: **fully-managed** and **self-managed**. Each model includes specific editions and pricing options. + +- The [fully-managed](#fully-managed-editions-and-pricing) deployment option includes **Standard**, **Enterprise**, and **Dedicated** editions with Pay-As-You-Go or Committed-Use discount pricing +- The [self-managed](#self-managed-editions-and-pricing) deployment model includes Firebolt’s **Firebolt Core** and **Private Cloud**, with pricing based on your infrastructure setup. + +All fully-managed and **Private Cloud** editions have associated [support plans and service-level agreements](#support-plans-and-service-level-agreements) that define a time to first response. + +The following sections outline the available deployment models, their editions, and associated pricing options. + +### [](#fully-managed-editions-and-pricing)Fully managed editions and pricing + +Firebolt manages compute infrastructure, software maintenance, and upgrades across all fully-managed editions. Firebolt handles multi-dimensional elasticity, high performance, scaling, maintenance, and performance optimizations. + +- [Fully-managed editions](#fully-managed-editions) – Firebolt offers the **Standard**, **Enterprise** and **Dedicated** editions. +- [Fully-managed pricing model](#fully-managed-pricing-model) – Your total price consists of a storage and a compute cost. + + - [Data storage pricing](#data-storage-pricing) – The storage portion of your total cost. + - [Compute usage pricing](#compute-usage-pricing) – The compute portion of your total cost. +- [Fully-managed pricing plans](#fully-managed-pricing-plans) – How to pay for resource usage. + + - [Billing setup and monitoring](#billing-setup-and-monitoring) – How to use the Billing dashboard to view resource consumption and set up billing and a plan. + - [Set up billing for fully-managed plans](#set-up-billing-for-fully-managed-plans) – How to set up pay-as-you-go billing through AWS Marketplace. + - [Sign up or change your fully-managed edition](#sign-up-or-change-your-fully-managed-edition) – How to set up or change your fully-managed edition type. + +#### [](#fully-managed-editions)Fully-managed editions + +There are three editions available: **Standard**, **Enterprise**, and **Dedicated**. + +![Firebolt offers the Standard, Enterprise, and Dedicated editions with different features.](../../assets/images/firebolt-fully-managed.png) + +**Standard edition** +The **Standard** edition is best for teams seeking a low-cost entry point without the need to manage compute infrastructure. It offers high performance with sub-second query latency, flexible compute scaling within a single cluster. Security features include Role-Based Access Control (RBAC), single sign-on (SSO), multi-factor authentication (MFA), and audit logging. **Standard** includes a flexible compute option that is optimized for either storage or for compute. Firebolt manages your compute infrastructure, software maintenance, and upgrades. + +**Enterprise edition** +The **Enterprise** edition is designed for organizations that need advanced security, compliance, and automatic compute scaling. It includes all the features in the **Standard** edition, plus multi-cluster scaling, auto-scaling for concurrency, [AWS PrivateLink](/Guides/security/privatelink.html), and HIPAA compliance support. + +**Dedicated edition** +The **Dedicated** edition is ideal for organizations that require high levels of security and isolation, such as those operating in government clouds. It includes all the features in the **Enterprise** edition, plus complete data isolation on a single-tenant infrastructure, offering the highest level of security. + +### [](#fully-managed-pricing-model)Fully-managed pricing model + +The total cost for Firebolt’s fully-managed editions consists of a **cost for data storage** plus a **cost for compute usage**. The compute usage cost depends on the type of engine that you select: compute-optimized or storage-optimized. + +![Total cost consists of data storage and compute usage with two types of compute options.](../../assets/images/compute-usage-cost.png) + +#### [](#data-storage-pricing)Data storage pricing + +Data storage costs are based on the amount of compressed data stored, including indexes and raw data. Pricing is based on Amazon S3 costs in your selected AWS region. There are no additional storage charges beyond S3 costs. + +Available AWS Regions Price per TB/month ($USD) US East (N. Virginia) $23.00 US West (Oregon) $23.00 Europe (Frankfurt) $24.50 Europe (Ireland) $23.00 Asia Pacific (Singapore) $25.00 + +#### [](#compute-usage-pricing)Compute usage pricing + +Compute costs are measured in Firebolt Units (FBUs) and vary based on [engine](/Overview/engine-fundamentals.html) node type, number of nodes or cluster size, the number of clusters and usage duration. Costs are only billed for the time Firebolt engines are running. Firebolt offers two compute family options: + +- **Storage-optimized** (default): High SSD capacity for caching and production workloads. +- **Compute-optimized**: About 2x cheaper; ideal for development and test environments or workloads with smaller active datasets. + +The following table outlines the available node types, their compute family, and the corresponding FBU sizing: + +Node type Compute family Sizing in FBU Small (S) Storage-optimized 8 Medium (M) Storage-optimized 16 Large (L) Storage-optimized 32 Extra Large (XL) Storage-optimized 64 Small (S) Compute-optimized 4 Medium (M) Compute-optimized 8 Large (L) Compute-optimized 16 Extra Large (XL) Compute-optimized 32 + +Firebolt’s compute usage pricing is based on **FBUs**, which vary by node type and region. The following table provides hourly pricing for **storage-optimized** node types across **Standard** and **Enterprise** plans, distinguishing between **US and Non-US regions**. Compute costs are only incurred while Firebolt engines are running, with **per-second billing**. + +**Storage-optimized pricing for compute usage** + +Node Type Sizing in FBU Standard: US Region Pricing ($0.23/FBU/hr) Enterprise: US Region Pricing ($0.35/FBU/hr) Standard: Non-US Region Pricing ($0.28/FBU/hr) Enterprise: Non-US Region Pricing ($0.42/FBU/hr) **Small (S)** 8 $1.84 $2.80 $2.24 $3.36 **Medium (M)** 16 $3.68 $5.60 $4.48 $6.72 **Large (L)** 32 $7.36 $11.20 $8.96 $13.44 **Extra Large (XL)** 64 $14.72 $22.40 $17.92 $26.88 + +The following table provides hourly pricing for **compute-optimized** node types across **Standard** and **Enterprise** plans, distinguishing between **US and Non-US regions**. Compute costs are only incurred while Firebolt engines are running, with **per-second billing**. + +**Compute-optimized pricing for compute usage** + +Node Type Sizing in FBU Standard: US Region Pricing ($0.23/FBU/hr) Enterprise: US Region Pricing ($0.35/FBU/hr) Standard: Non-US Region Pricing ($0.28/FBU/hr) Enterprise: Non-US Region Pricing ($0.42/FBU/hr) **Small (S)** 4 $0.92 $1.40 $1.12 $1.68 **Medium (M)** 8 $1.84 $2.80 $2.24 $3.36 **Large (L)** 16 $3.68 $5.60 $4.48 $6.72 **Extra Large (XL)** 32 $7.36 $11.20 $8.96 $13.44 + +### [](#fully-managed-pricing-plans)Fully-managed pricing plans + +Firebolt offers two pricing options for the **Standard** and **Enterprise** editions: + +- **Pay-as-you-go**: A flexible plan that provides on-demand pricing with no upfront cost or commitment. This plan is ideal for startups or teams with unpredictable workloads. Customers get billed monthly based on actual usage and only pay for what they use with per-second billing. +- **Committed-use discounts**: A consumption model that provides discounted rates against a prepaid usage commitment. This plan is ideal for organizations with consistent, high-volume workloads and results in lower total costs compared to the pay-as-you-go plan. Once all prepaid FBU credits are consumed, your plan switches to the pay-as-you-go pricing model. Customers can always continue using Firebolt, with consumption either drawing from prepaid credits or transitioning to the pay-as-you-go model when credits run out. + +Contact [support@firebolt.io](mailto:support@firebolt.io) to discuss a committed-use plan, annual pricing commitments, or **Dedicated** edition pricing. + +#### [](#billing-setup-and-monitoring)Billing setup and monitoring + +You can use Firebolt’s billing dashboard to monitor resource consumption, track expenses, monitor payments, and analyze billing trends efficiently. To view the billing dashboard, follow steps 1-3 in the following section to [set up billing for fully-managed plans](#set-up-billing-for-fully-managed-plans). + +Billing invoices are generated on a monthly basis, and provide a detailed breakdown of resource consumption and associated costs. + +#### [](#set-up-billing-for-fully-managed-plans)Set up billing for fully-managed plans + +**Pay-As-You-Go setup via AWS Marketplace:** + +1. Log in to [Firebolt’s Workspace](https://go.firebolt.io/login). If you haven’t yet registered with Firebolt, see the [Get Started](/Guides/getting-started/) guide. +2. In the Firebolt Workspace, select the Configure(![The Firebolt Configure Space icon](../../assets/images/configure-icon.png)) icon from the left navigation pane. +3. Under **Configure**, select **Billing**. This page allows you to view invoices and consumption details. +4. Select **Connect to AWS Marketplace**. +5. On AWS Marketplace, click **View Purchase Options** > **Setup Your Account**. + +Firebolt will bill you monthly through **AWS Marketplace** based on usage. + +#### [](#sign-up-or-change-your-fully-managed-edition)Sign up or change your fully-managed edition + +If you want to sign up or upgrade your fully-managed edition type, you can choose the **Standard**, **Enterprise** or **Dedicated** plan. Select your new choice in the **Firebolt Workspace** as follows: + +1. Log in to [Firebolt’s Workspace](https://go.firebolt.io/login). +2. In the Firebolt Workspace, select the Configure(![The Firebolt Configure Space icon](../../assets/images/configure-icon.png)) icon from the left navigation pane. +3. Under **Configure**, expand the drop-down list next to **Billing**. +4. Under **Billing**, select **Plan** to open the list of available plans. Your active plan is labeled as **Current Plan**. For information about each plan, select **Learn more** to be directed to Firebolt’s [Pricing](https://www.firebolt.io/pricing) page. +5. Select your desired plan. + + 1. To select the **Standard** or **Enterprise** plan, choose **Select plan** and confirm your selection. + 2. To select the **Dedicated** plan, do the following: + + 1. Choose **Talk to Sales**. + 2. In the pop-up window, **Your email** is automatically populated with the email associated with your login. + 3. Enter a **Subject** or accept the default **Pricing plan** entry. + 4. Enter a **Description**. + 5. Select **Send** to notify Firebolt’s support team. + +Changing your plan is **not immediate** and may take **up to 24 hours** to process. You will receive updates about the status of your request through email. + +### [](#self-managed-editions-and-pricing)Self-managed editions and pricing + +Firebolt offers two self-managed options, where you run Firebolt on your own infrastructure: **Firebolt Core** and **Private Cloud**. + +![Firebolt offers two self-managed editions. One that is free that you manage, and one for a private cloud.](../../assets/images/self-managed-editions.png) + +**Private Cloud (BYOC)** +The **Private Cloud** edition is a BYOC (bring your own cloud) offering for organizations that want Firebolt’s software but prefer to use their own cloud infrastructure. Customers manage their own infrastructure for both compute and storage, whereas Firebolt manages hosting, Firebolt upgrades and maintenance. For BYOC pricing, contact [support@firebolt.io](mailto:support@firebolt.io). + +**Firebolt Core** +The **Firebolt Core** edition is a free downloadable version that can be deployed on cloud, on-premises, or on a local machine. This option is best for teams needing full control over deployment with a lightweight Firebolt engine. Customers manage compute and storage infrastructure, hosting, all software upgrades, and maintenance. + +## [](#support-plans-and-service-level-agreements)Support plans and service level agreements + +Firebolt offers support options based on your selected edition for fully-managed and **Private Cloud** editions. + +Response Time Commitments (TFR = Time to First Response) + +Severity Level Issue Type Business support: TFR for Standard Edition Premium support: TFR for Enterprise, Dedicated or Private Cloud Critical (Sev1) Service outage or major disruption Response within 4 hours Response within 30 minutes High (Sev2) Significant performance degradation Response within 8 business hours Response within 2 hours Medium (Sev3) Minor impact or feature issue Response within 24 business hours Response within 6 business hours Low (Sev4) General inquiries or documentation questions Response within 48 business hours Response within 24 business hours + +**Premium support features** + +**Enterprise**, **Private Cloud**, and **Dedicated** edition customers receive the following additional support benefits beyond response time commitments: + +- **Proactive monitoring**: Alerts for issues and potential optimizations available to **Enterprise** edition customers. +- **Enhanced support channels**: Support through Slack, email and the [help menu](/Reference/help-menu.html) in the **Firebolt Workspace**. +- **Dedicated support engineer**: Customers are assigned a designated support engineer with deep knowledge of their environment, providing personalized support. This premium support level differs from **Standard** customers, who receive assistance from the general support pool. + +Contact [support@firebolt.io](mailto:support@firebolt.io) to learn more about **Enterprise** edition Support offerings. + +* * * + +- [Explore compute cost](/Overview/billing/compute-cost.html) +- [Explore storage cost](/Overview/billing/storage-cost.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_billing_compute_cost.md b/cmd/docs-scrapper/fireboltdocs/overview_billing_compute_cost.md new file mode 100644 index 0000000..aeef5a3 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_billing_compute_cost.md @@ -0,0 +1,144 @@ +# [](#explore-compute-cost)Explore compute cost + +You can use data from the [`information_schema.engines_billing`](/sql_reference/information-schema/engines-billing.html) and [`information_schema.engine_history`](/sql_reference/information-schema/engine-history.html) views, to analyze and optimize compute costs related to engine usage, scaling, and auto-stop events. The following queries help track compute costs, identify cost patterns, and understand the impact of different configurations on your compute expenses. + +## [](#examples)Examples + +**Cost tracking and analysis** + +- [Track hourly average costs over time](#track-hourly-average-costs-over-time) – Learn how to track hourly patterns in compute costs. +- [Find top users by cost](#find-top-users-by-cost) – Learn how to identify users that are responsible for the highest compute costs. +- [Find the cost impact of auto-stop events](#find-the-cost-impact-of-auto-stop-events) – Learn how to query engine history before and after an auto-stop event to determine if the auto-stop functionality leads to cost savings. +- [Calculate costs incurred after engine deletion](#calculate-costs-incurred-after-engine-deletion) – Learn how to track costs that continue to accumulate even an engine has stopped running. +- [Calculate costs incurred after engine creation or scaling](#calculate-costs-incurred-after-engine-creation-or-scaling) – Learn how to calculate the cost of provisioning or resizing engines to optimize resource allocation. + +**Engine configuration and optimization** + +- [Rank engine configurations by total cost](#rank-engine-configurations-by-total-cost) – Learn how to analyze engine billing data understand which combinations of engine type, family, and node count drive the most cost. +- [Rank efficiency by cluster count](#rank-efficiency-by-cluster-count) – Learn how to determine if single or multi-cluster setups are more efficient. +- [Calculate cost savings through auto-stop](#calculate-cost-savings-through-auto-stop) – Learn how to compare the average cost of engines with auto-stop enabled versus disabled. + +### [](#track-hourly-average-costs-over-time)Track hourly average costs over time + +You can use `information_schema.engines_billing` to track hourly average costs over time and identify peak load hours, allowing you to pinpoint when engine usage and costs are highest. Use this data to enable more efficient resource allocation and better cost optimization. Analyze daily trends to identify peak load hours and better understand the times when engine usage and associated costs are the highest. + +The following code example calculates the average billed cost per hour over the past 7 days, grouping the results by hour of the day: + +``` +SELECT EXTRACT(HOUR FROM usage_date::TIMESTAMP) AS usage_hour, + AVG(billed_cost) AS avg_hourly_cost +FROM information_schema.engines_billing +WHERE usage_date >= CURRENT_DATE - INTERVAL '7 days' +GROUP BY EXTRACT(HOUR FROM usage_date::TIMESTAMP) +ORDER BY usage_hour; +``` + +### [](#find-top-users-by-cost)Find top users by cost + +You can use `information_schema.engines_billing` and `information_schema.engine_history` to identify the top engine owners by analyzing engine billing data. Pinpoint the individuals or teams responsible for the highest engine costs over a specific period, fostering accountability and enabling more effective resource management. + +The following code example calculates the total billed cost for each engine owner between the specified dates, sorting the results in descending order to show the highest spenders first: + +``` +SELECT engine_history_table.engine_owner, SUM(engine_billing_table.billed_cost) AS total_cost +FROM information_schema.engines_billing engine_billing_table +JOIN information_schema.engine_history engine_history_table USING (engine_name) +WHERE engine_billing_table.usage_date BETWEEN DATE '2025-02-24' AND DATE '2025-03-26' +GROUP BY engine_history_table.engine_owner +ORDER BY total_cost DESC; +``` + +### [](#find-the-cost-impact-of-auto-stop-events)Find the cost impact of auto-stop events + +When you [create an engine](/sql_reference/commands/engines/create-engine.html), you can set the `AUTO_STOP` parameter to automatically stop an engine after it has been idle for a specified number of minutes. Query the engine history before and after an auto-stop event to see if auto-stop functionality results in cost savings. + +The following code example calculates the billed cost for engines both before and after an auto-stop event, showing how the cost changes once the engine is stopped automatically: + +``` +SELECT engine_billing_table.engine_name, + CAST(engine_history_table.event_start_time AS DATE) AS auto_stop_date, + SUM(CASE WHEN engine_billing_table.usage_date < CAST(engine_history_table.event_start_time AS DATE) THEN engine_billing_table.billed_cost ELSE 0 END) AS cost_before, + SUM(CASE WHEN engine_billing_table.usage_date >= CAST(engine_history_table.event_start_time AS DATE) THEN engine_billing_table.billed_cost ELSE 0 END) AS cost_after +FROM information_schema.engine_history engine_history_table +JOIN information_schema.engines_billing engine_billing_table ON engine_history_table.engine_name = engine_billing_table.engine_name +WHERE engine_history_table.event_type = 'AUTO_STOP' +GROUP BY engine_billing_table.engine_name, CAST(engine_history_table.event_start_time AS DATE); +``` + +### [](#calculate-costs-incurred-after-engine-deletion)Calculate costs incurred after engine deletion + +Use engine history and billing information to identify any billing discrepancies that occur after an engine has been deleted. By examining any charges that incurred after an engine was deleted, you can detect any unresolved billing issues or configuration errors. + +The following code calculates the total billed cost incurred after an engine deletion event, identifying any charges that are incurred after the engine has been deleted, and grouping the results by engine name: + +``` +SELECT engine_billing_table.engine_name, MAX(engine_history_table.event_finish_time) AS deletion_time, + SUM(engine_billing_table.billed_cost) AS post_deletion_cost +FROM information_schema.engine_history engine_history_table +JOIN information_schema.engines_billing engine_billing_table ON engine_history_table.engine_name = engine_billing_table.engine_name +WHERE engine_history_table.event_type = 'ENGINE_DELETE' + AND engine_billing_table.usage_date > CAST(engine_history_table.event_finish_time AS DATE) +GROUP BY engine_billing_table.engine_name +HAVING SUM(engine_billing_table.billed_cost) > 0; +``` + +### [](#calculate-costs-incurred-after-engine-creation-or-scaling)Calculate costs incurred after engine creation or scaling + +Analyze engine billing data to track the costs incurred after engine creation or scaling events to understand the immediate financial impact of provisioning or resizing engines and optimize resource allocation. Rank them to find top combinations of engine size and architecture by cost. + +The following code calculates the total billed cost for each engine after an `ENGINE_CREATE` or `SCALE_UP` event, grouping the results by engine name and event type, and sorting the output by the total cost in descending order: + +``` +SELECT engine_billing_table.engine_name, engine_history_table.event_type, CAST(engine_history_table.event_start_time AS DATE) AS event_date, + SUM(engine_billing_table.billed_cost) AS total_cost_after_event +FROM information_schema.engine_history engine_history_table +JOIN information_schema.engines_billing engine_billing_table ON engine_history_table.engine_name = engine_billing_table.engine_name +WHERE engine_history_table.event_type IN ('ENGINE_CREATE', 'SCALE_UP') + AND engine_billing_table.usage_date > CAST(engine_history_table.event_start_time AS DATE) +GROUP BY engine_billing_table.engine_name, engine_history_table.event_type, CAST(engine_history_table.event_start_time AS DATE) +ORDER BY total_cost_after_event DESC; +``` + +### [](#rank-engine-configurations-by-total-cost)Rank engine configurations by total cost + +Analyze engine billing data to rank engine configurations by total cost, identifying those driving the highest expenses. Optimize resource allocation and more cost-effective decisions for your engine setup. + +The following code calculates the total billed cost for different engine configurations that ran of type, family, and node count, over a specified date range, and sorts the results by total cost in descending order: + +``` +SELECT engine_history_table.type, engine_history_table.family, engine_history_table.nodes, SUM(engine_billing_table.billed_cost) AS total_cost +FROM information_schema.engines_billing engine_billing_table +JOIN information_schema.engine_history engine_history_table USING (engine_name) +WHERE engine_billing_table.usage_date BETWEEN DATE '2025-02-24' AND DATE '2025-03-26' +GROUP BY engine_history_table.type, engine_history_table.family, engine_history_table.nodes +ORDER BY total_cost DESC; +``` + +### [](#rank-efficiency-by-cluster-count)Rank efficiency by cluster count + +Ranking efficiency by cluster count helps assess the cost-effectiveness of single versus multi-cluster setups. Analyzing engine billing data enables you to track associated costs and optimize resource allocation based on cost efficiency. + +The following code example calculates the cost per [FBU](/Overview/engine-consumption.html#engine-consumption) for single or multi-cluster setups by dividing the total billed cost by the total consumed FBUs for each cluster configuration, and then sorting the results by cost per FBU in descending order: + +``` +SELECT engine_history_table.clusters, SUM(engine_billing_table.billed_cost) / NULLIF(SUM(engine_billing_table.consumed_fbu), 0) AS cost_per_fbu +FROM information_schema.engines_billing engine_billing_table +JOIN information_schema.engine_history engine_history_table USING (engine_name) +WHERE engine_billing_table.usage_date BETWEEN DATE '2025-02-24' AND DATE '2025-03-26' +GROUP BY engine_history_table.clusters +ORDER BY cost_per_fbu DESC; +``` + +### [](#calculate-cost-savings-through-auto-stop)Calculate cost savings through auto-stop + +Calculating cost savings through auto-stop allows you to compare the daily costs of engines with auto-stop enabled and disabled, helping you understand its impact on costs and optimize resource allocation. + +The following code example calculates the average daily billed cost for engines with auto-stop enabled and disabled, grouping the results by the auto-stop setting to provide insights into the cost impact of having auto-stop turned on or off: + +``` +SELECT engine_history_table.auto_stop, AVG(engine_billing_table.billed_cost) AS avg_cost +FROM information_schema.engines_billing engine_billing_table +JOIN information_schema.engine_history engine_history_table USING (engine_name) +WHERE engine_billing_table.usage_date BETWEEN DATE '2025-02-24' AND DATE '2025-03-26' +GROUP BY engine_history_table.auto_stop; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_billing_storage_cost.md b/cmd/docs-scrapper/fireboltdocs/overview_billing_storage_cost.md new file mode 100644 index 0000000..1d0dfca --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_billing_storage_cost.md @@ -0,0 +1,164 @@ +# [](#explore-storage-cost)Explore storage cost + +You can use data from the [`information_schema.storage_billing`](/sql_reference/information-schema/storage-billing.html) and [`information_schema.storage_history`](/sql_reference/information-schema/storage-history.html) views to analyze and optimize storage costs across different regions, accounts, and data usage trends. The following queries will guide you in tracking storage expenses, understanding regional cost variations, and identifying storage inefficiencies. + +## [](#examples)Examples + +**Track and attribute storage costs** + +- [Track total storage cost over time](#track-total-storage-cost-over-time) – Learn how to monitor daily and monthly storage costs. +- [Identify storage cost by region](#identify-storage-cost-by-region) – Learn how to determine which regions contribute most to storage expenses. +- [Attribute storage costs to accounts](#attribute-storage-costs-to-accounts) – Learn how to attribute storage costs to specific customers or business units. +- [Track storage credits usage](#track-storage-credits-usage) – Learn how to monitor what percentage of your storage cost is paid using Firebolt credits. + +**Monitor data efficiency** + +- [Monitor active vs inactive data trends](#monitor-active-vs-inactive-data-trends) – Learn how to track the proportion of active and inactive data in your storage. +- [Identify databases with high inactive storage](#identify-databases-with-high-inactive-storage) – Learn how to find databases with significant amounts of inactive data. +- [Track storage growth by catalog](#track-storage-growth-by-catalog) – Learn how to monitor storage growth trends across catalogs, which include tables, views, and schemas. + +**Evaluate cost efficiency and inactive data** + +- [Evaluate cost efficiency per unit of storage](#evaluate-cost-efficiency-per-unit-of-storage) – Learn how to calculate the cost per unit storage to assess the efficiency of your storage expenses relative to the amount of data stored. +- [Assess inactive data as a percentage of total storage](#assess-inactive-data-as-a-percentage-of-total-storage) – Learn how to understand what percentage of your total storage is used by data that is inactive. +- [Attribute storage billing to catalogs](#attribute-storage-billing-to-catalogs) – Learn how to attribute storage billing to specific tables, views, and schemas. + +### [](#track-total-storage-cost-over-time)Track total storage cost over time + +You can track total storage cost over time to monitor spending trends, identify potential cost increases, and ensure that storage usage aligns with budget expectations. Query `information_schema.storage_billing` to help you proactively manage costs, so you can make adjustments to optimize storage efficiency and prevent unexpected charges. + +The following code example calculates the daily storage cost and the total monthly storage cost by summing the billed costs for each day and using a window function to compute the monthly total for each day within the same month: + +``` +SELECT usage_date, + SUM(billed_cost) AS total_cost, + DATE_TRUNC('month', usage_date) AS month, + SUM(SUM(billed_cost)) OVER (PARTITION BY DATE_TRUNC('month', usage_date)) AS monthly_cost +FROM information_schema.storage_billing +GROUP BY usage_date +ORDER BY usage_date; +``` + +### [](#identify-storage-cost-by-region)Identify storage cost by region + +Use [`information_schema.storage_billing`](/sql_reference/information-schema/storage-billing.html) inside a query to understand which regions incur the highest storage costs, helping you optimize resource allocation based on geographical pricing differences. + +The following code example calculates the total storage cost for each region and sorts the results in descending order of total cost: + +``` +SELECT region, SUM(billed_cost) AS total_cost +FROM information_schema.storage_billing +GROUP BY region +ORDER BY total_cost DESC; +``` + +### [](#attribute-storage-costs-to-accounts)Attribute storage costs to accounts + +You can use `information_schema.storage_billing` to attribute storage costs to specific accounts or business units and gain visibility into where your storage expenses originate. This can help you track usage, optimize costs, and allocate resources more effectively. + +The following code example calculates the total storage cost for each account and sorts the results in descending order of total cost: + +``` +SELECT account_name, SUM(billed_cost) AS total_cost +FROM information_schema.storage_billing +GROUP BY account_name +ORDER BY total_cost DESC; +``` + +### [](#track-storage-credits-usage)Track storage credits usage + +Analyzing how much of your storage costs is paid with Firebolt credits versus billed costs helps optimize expenses, track credit usage, identify cost-saving opportunities, and improve budget management. Use `information_schema.storage_billing` to analyze how much of your storage cost is paid using Firebolt credits and how much is billed. + +The following code example uses the `is_credit` field to calculate the total storage cost, grouped by whether the cost was paid through credits or billed: + +``` +SELECT is_credit, SUM(billed_cost) AS total_cost +FROM information_schema.storage_billing +GROUP BY is_credit; +``` + +### [](#monitor-active-vs-inactive-data-trends)Monitor active vs inactive data trends + +Inactive data refers to data that is no longer actively used or is infrequently accessed but still consumes storage resources. Retaining large volumes of inactive data can result in higher costs and wasted storage. Use [`information_schema.storage_history`](/sql_reference/information-schema/storage-history.html) to identify this data so that you can make decisions about optimizing storage to reduce unnecessary expenses. + +The following code example shows daily trends for active and inactive data in Gibibyte (GiB), which is equal to 1,073,741,824 bytes: + +``` +SELECT usage_date, + SUM(active_data_size_bytes)/1024/1024/1024 AS active_gib, + SUM(inactive_data_size_bytes)/1024/1024/1024 AS inactive_gib +FROM information_schema.storage_history +GROUP BY usage_date +ORDER BY usage_date; +``` + +### [](#identify-databases-with-high-inactive-storage)Identify databases with high inactive storage + +You can identify databases that contain large amounts of inactive data, which may be a candidate for cleanup or archiving. + +The following code example calculates the total inactive data size for each database: + +``` +SELECT catalog_name, + SUM(inactive_data_size_bytes)/1024/1024/1024 AS inactive_gib +FROM information_schema.storage_history +GROUP BY catalog_name +ORDER BY inactive_gib DESC; +``` + +### [](#track-storage-growth-by-catalog)Track storage growth by catalog + +You can use `information_schema.storage_history` to track storage growth by catalog, or objects that hold metadata and database objects such as tables, views, databases and schemas, to monitor how storage usage is changing across different databases or projects. Identify trends, optimize storage allocation, and ensure that resources are being used efficiently, while also providing insight into which catalogs may require further optimization or cost management. + +The following code example calculates the total storage size in GiB for each catalog by summing active and inactive data sizes and ordering the results by catalog and usage date: + +``` +SELECT catalog_name, usage_date, + (active_data_size_bytes + inactive_data_size_bytes)/1024/1024/1024 AS total_gib +FROM information_schema.storage_history +ORDER BY catalog_name, usage_date; +``` + +### [](#evaluate-cost-efficiency-per-unit-of-storage)Evaluate cost efficiency per unit of storage + +You can use `information_schema.storage_billing` to evaluate cost efficiency per unit of storage by assessing whether storage costs are proportional to the amount of data being stored. This can help you identify discrepancies, optimize spending, and ensure that storage resources are being utilized efficiently. Determine if your storage costs align with the volume of data stored by calculating the cost per GiB. + +The following code example calculates the cost per GiB of storage for each day by dividing the total billed cost by the total consumed GiB, and orders the results by usage data: + +``` +SELECT usage_date, + SUM(billed_cost) / NULLIF(SUM(consumed_gib_per_month), 0) AS cost_per_gib +FROM information_schema.storage_billing +GROUP BY usage_date +ORDER BY usage_date; +``` + +### [](#assess-inactive-data-as-a-percentage-of-total-storage)Assess inactive data as a percentage of total storage + +You can use `information_schema.storage_history` to measure the amount of inactive data as a percentage of total storage to identify how much of storage is being underutilized. Assessing inactive data as a percentage of total storage helps you understand how much of your storage is underutilized, enabling you to optimize costs by cleaning up or archiving inactive data. + +The following code example calculates the percentage of inactive data relative to the total storage, composed of both active and inactive storage, for each day and orders the results by usage date: + +``` +SELECT usage_date, + 100 * SUM(inactive_data_size_bytes)::FLOAT / + NULLIF(SUM(active_data_size_bytes + inactive_data_size_bytes), 0) AS inactive_pct +FROM information_schema.storage_history +GROUP BY usage_date +ORDER BY usage_date; +``` + +### [](#attribute-storage-billing-to-catalogs)Attribute storage billing to catalogs + +You can use `information_schema.storage_billing` to attribute storage billing to catalogs, or objects that hold metadata and database objects such as tables, databases, views, and schemas. Use this data to better understand and allocate storage costs to specific departments, projects, or business units, so that you can enable more accurate cost tracking, budgeting, and optimization across different catalogs. + +The following code example calculates the total storage billing cost for each catalog by joining storage billing data with catalog account mappings and storage history, and then sorts the results by total cost in descending order: + +``` +SELECT storage_history.catalog_name, SUM(storage_billing.billed_cost) AS total_cost +FROM information_schema.storage_billing storage_billing +JOIN some_catalog_account_mapping m ON storage_billing.account_name = m.account_name +JOIN information_schema.storage_history sh ON m.catalog_name = storage_history.catalog_name AND storage_billing.usage_date = storage_history.usage_date +GROUP BY storage_history.catalog_name +ORDER BY total_cost DESC; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_data_management.md b/cmd/docs-scrapper/fireboltdocs/overview_data_management.md new file mode 100644 index 0000000..e96d17b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_data_management.md @@ -0,0 +1,161 @@ +# [](#data-management)Data management + +At Firebolt, the focus is on delivering the highest levels of performance and efficiency while minimizing the heavy lift that needs to be done by an end user. The foundational elements of Firebolt provide the full data management capabilities needed by modern data applications, including an optimized storage layer, parallel-pipelined data ingestion, ACID DML, primary and aggregated indexes, streamlined deletes and updates, and core database semantics. + +Delivering consistent performance and efficiency requires careful considerations, modeling and optimal indexing strategy. With automated index maintenance, Firebolt enables a hands-free approach to delivering consistent performance without the operational burden, even for cases where data modification functionality is needed. Various types of DML statements can impact system performance as it can result in storage fragmentation, negating the effectiveness of indexes. However, Firebolt addresses this with the choice of user-controlled or system-controlled backend optimization. + +This topic explains how Firebolt delivers full data management capabilities, including how to: 1) create a simple table, 2) ingest data into it using bulk insert and singleton insert capabilities, and finally, 3) modify the data by performing delete and update operations. + +Learn also what happens behind the scenes, how data is being stored internally and how internal data structures evolve as data gets manipulated and changed, as well as how Firebolt handles concurrency, managing indexes and optimizing storage. + +- [Transactions and concurrency](#transactions-and-concurrency) +- [Creating tables](#creating-tables) +- [Inserting data](#inserting-data) +- [Deleting data](#deleting-data) +- [Updating data](#updating-data) +- [Optimizing storage](#optimizing-storage) +- [Maintaining indexes](#maintaining-indexes) + +## [](#transactions-and-concurrency)Transactions and concurrency + +Before getting into how data manipulation and changes work in Firebolt, it is important to first explain transactions and concurrency controls that are implemented within the system. + +Firebolt treats each request or SQL statement as a new implicit transaction. Transactions guarantee the ACID properties: atomicity (all or nothing), consistency (will not transition to an invalid state), isolation (no interference between concurrent transactions, depending on isolation level, see below), durability (no data loss once committed). + +Firebolt supports two isolation levels: + +- DDL (CREATE, ALTER, DROP) and DCL (GRANT, REVOKE) work at *serializable isolation* level. +- DQL (SELECT) and DML (INSERT, UPDATE, DELETE) work at *snapshot isolation* level. + +Serializable isolation means that the result of running concurrent statments is equivalent to as if they were run serially in some order. + +Snapshot isolation is weaker and guarantees that a transaction sees a consistent version of the database. Transactions fail if any written rows are being written concurrently by another transaction. It is a weaker isolation than serializable, because a transaction does not fail if read data is being changed concurrently. This makes it subject to so called write skew anomalies. As a user, you need to make sure that the write skew anomaly is harmless for your application or does not occur - for example by avoiding concurrent read-write operations. In exchange, Firebolt can deliver the performance, scale and throughput that data intensive applications need. + +## [](#creating-tables)Creating tables + +Let’s look at an example of how to [create a simple table](/sql_reference/commands/data-definition/create-fact-dimension-table.html) called rankings. + +``` +CREATE TABLE IF NOT EXISTS rankings ( + GameID INTEGER, + PlayerID INTEGER, + MaxLevel INTEGER, + TotalScore BIGINT, + PlaceWon INTEGER, + TournamentID INTEGER, + SOURCE_FILE_NAME TEXT, + SOURCE_FILE_TIMESTAMP TIMESTAMP) +PRIMARY INDEX GameID, TournamentID, PlayerID; +``` + +The statement also contains a [primary index](/Overview/indexes/primary-index.html) definition. While primary index creation is optional in Firebolt, defining primary indexes on all tables is encouraged. Primary indexes, and indexes in general, are critical for delivering performance and achieving efficient query execution. + +How do indexes help with delivering this performance? Each table in Firebolt is composed of internal objects called tablets. Tablets store data in columnar format with built-in data compression, reducing disk io and resulting in efficient, performant query processing. Input data gets automatically converted into this columnar data format as the data gets ingested into the table. The size of individual tablets and the number of tablets per table is automatically determined and managed by the Firebolt system. When a table is defined as a partitioned table, individual tablets are bound to a particular partition, as tablets cannot span multiple partitions. + +![Tablets](/assets/images/tablets.png) + +Defining a primary index on a table forces data sorting within each tablet. Primary indexes can be defined using one or multiple columns, and data ordering follows that index definition. Implementing proper primary indexes and having data ordered correctly is critical for many reasons. First, since each tablet contains granular metadata information about data stored within the tablet itself, scalar predicates and semi-join queries that use primary index columns can be optimized by scanning only the data that is required (as a data pruning process). Second, only tablets needed to satisfy the query can be touched, even though a table may contain many tablets behind the scenes – unneeded tablets are skipped during querying time (as a tablet pruning process). Finally, the ordered dataset typically compresses to a smaller size, leading to reduced IO operations. + +In addition, in situations where users create partitioned tables, queries containing filters on partitioned columns can eliminate unnecessary data scans by querying only partitions needed for results (as a partition pruning process). All techniques mentioned above lead to efficiency when querying and processing data. + +## [](#inserting-data)Inserting data + +To support loading data from a data lake (Amazon S3) into Firebolt tables, Firebolt provides utilities for loading data in bulk. Users can perform both initial and incremental data ingestion using either a [COPY](/sql_reference/commands/data-management/copy-from.html) statement or [INSERT INTO SELECT FROM <external\_table>](/sql_reference/commands/data-management/insert.html) syntax. + +In addition, Firebolt supports singleton inserts via a `INSERT INTO VALUES` statement. Typical scenarios where singleton inserts come handy are: + +- Refreshing tables with small amounts of dimensional data, and +- Implementing slowly changing dimension patterns. + +Let’s review how these utilities work in more detail, and their corresponding performance. + +### [](#bulk-inserts)**Bulk inserts** + +In Firebolt, bulk data ingestion utilities are designed to be inherently scalable. There are 4 major stages that each loading process goes through: + +1\) Reading data from Amazon S3 + +2\) Converting data into tablets and columnstore + +3\) Merging tablets to optimize tablet quality (needed for efficient tablet pruning) + +4\) Uploading tablets into Amazon S3. + +As a distributed system and query engine, Firebolt supports both parallel and pipelined data processing, to scale loading processes. At the node level, Firebolt fully employs intra-node parallelism by having multiple reader/writer processes. Data loading parallelism automatically increases as the engine increases in scale (multi-node parallelism). Pipelined execution ensures that all deployed resources are kept busy while data is being streamed across transformation stages mentioned above. This way, users can ingest large amounts of data into Firebolt tables while utilizing deployed resources as efficiently as possible. + +![Ingest](/assets/images/ingest.png) + +As the bulk data load gets executed, Firebolt creates new tablets behind the scenes while preserving necessary metadata information about the data being ingested. Factors such as min/max values, tablet size, data offsets and ranges, tablet location, etc., are kept as metadata artifacts and used during query execution to deliver differentiated performance characteristics. + +Bulk insert statement example + +``` +COPY INTO rankings FROM 's3://Bucket/directory/'; +//The statement above copies all the files from a directory in an S3 bucket into the rankings table. +``` + +### [](#singleton-insert)**Singleton insert** + +While singleton insert is a supported pattern in Firebolt, using bulk insert utilities is recommended to optimize performance of large data ingestion. + +Data inserted by a singleton insert statement gets stored within a single tablet. From the tablet quality perspective, this creates a suboptimal situation, and can lead to table fragmentation in cases where many singleton inserts are executed. This in turn can lead to suboptimal query performance. One way to mitigate this situation is to use a mini batch pattern (a collection of `INSERT INTO VALUES` statements separated by a comma) whenever possible. When a batch of `INSERT INTO VALUES` statements are executed, Firebolt tries to create a single tablet, which will improve tablet quality and reduce table fragmentation. + +To minimize operational overhead and system maintenance that table fragmentation can cause, Firebolt implements a built-in optimization process that merges tablets with suboptimal size. This optimization process is fully autonomous and runs in the background. The background process searches on a periodic basis for suboptimal tablets and merges them while keeping tablet optimal size in mind. In addition, Firebolt supports the [VACUUM](/sql_reference/commands/data-management/vacuum.html) command that allows users full control to defragment tables on-demand. + +Singleton insert statement example + +``` +INSERT INTO rankings (GameID, PlayerID, MaxLevel, TotalScore, PlaceWon, TournamentID) VALUES (10, 231, 10, 1000020, 1, 5); +``` + +## [](#deleting-data)Deleting data + +Firebolt supports storing as much data as needed for as long as needed. However, there are situations where data does need to be deleted. Situations like data corrections that occur in the systems of records, or GDPR compliance where a single (or multiple) customer record(s) must be deleted to preserve privacy, have led to support for [DELETE](/sql_reference/commands/data-management/delete.html) statements in Firebolt. + +Firebolt supports deleting a single record in a table or deleting data in bulk. In addition to the tablet level metadata, each tablet comes with a delete log. As the records get deleted, the delete log is maintained. Given rich tablet-level metadata, only tablets that are affected by the delete operation are touched. In cases where `DELETE` query uses a primary key(s) as a predicate, Firebolt leverages primary key information to quickly find tablets and records that are affected by the DELETE operation, leading to performant deletes. Similarly, deleting all data that belongs to one or more partitions is almost instantaneous, as deleting data in these cases is only a metadata operation in Firebolt. + +Having frequent deletes in the system could lead to tablet fragmentation, so there’s a background process that continuously and automatically optimizes tablets behind the scenes. For more information, refer to [optimizing storage](#optimizing-storage) below. + +For optimal performance, leverage primary key(s) for deleting the data whenever possible. + +Bulk delete statement example + +``` +DELETE FROM rankings WHERE PlayerID = 231; +``` + +Singleton delete statement example + +``` +DELETE FROM rankings WHERE GameID = 10 AND TournamentID = 5 AND PlayerID = 231; +``` + +## [](#updating-data)Updating data + +In real life, data updates happen often. Individuals regularly update their contact information as their residency changes, prospects change their contact preference for marketing campaigns, an order changes its status as it goes through the fulfillment process, all causing data stored in the data warehouses to be updated. + +An [UPDATE](/sql_reference/commands/data-management/update.html) can be represented as a composite operation of `DELETE` followed by an `INSERT` operation. This holds true for both singleton as well as bulk update statements. Firebolt supports both simple as well as complex update functionality, including complex predicates and multi-table joins. Similarly, any column defined in a Firebolt table can be updated, including partitioning columns. While updating values for a partitioning column may lead to a longer execution time (depending on the number of records that exist in the partition, as updated data may need to be moved to newly assigned partitions), this functionality simplifies usage and avoids the need for manual workarounds to be done by our users. + +Update statement example + +``` +UPDATE rankings SET GameID = 11 WHERE GameID = 10 AND TournamentID = 5 AND PlayerID = 231; +``` + +## [](#optimizing-storage)Optimizing storage + +As mentioned in earlier sections, certain data modification scenarios (deletes/updates) could lead to situations where tables get fragmented. To help spot fragmentation situations and identify tables that are fragmented, Firebolt provides a view called [information\_schema.tables](/sql_reference/information-schema/tables.html). + +The `information_schema.tables` view provides a number of useful columns, such as the number of rows, compressed and uncompressed table size, among others. For our purposes here, two columns are of interest as fragmentation metrics: 1) `number_of_tablets` and 2) `fragmentation`. As the name suggests, the `number_of_tablets` column projects information about the total number of tablets in each table. The fragmentation column specifies the table fragmentation percentage (number between 0-100). As a best practice and general guidance, it’s recommend to keep fragmentation below 80. + +Tables that are fragmented have been identified, now what? There are two paths that Firebolt provides to tackle fragmentation state: + +1\) the user-driven option of using a `VACUUM` statement, and 2) system-driven clean up that runs in the background. + +Both options lead to optimizing fragmented tablets and merging them into tablets with ideal sizes. `VACUUM` functionality leverages the user-defined engine to perform this optimization task, while system-driven behavior runs completely autonomously and uses Firebolt-provided capacity. The column `number_of_tablets` can be used to identify the total number of tablets before and after optimization tasks are performed. Similarly, the fragmentation metrics from `information_schema.tables` can be used to monitor progress and assess effectiveness of the background process to keep the table in a healthy state. + +The background process is recommended due to ease of the management and administration, but both options are available to users to fit their desired behavior and needs. + +## [](#maintaining-indexes)Maintaining indexes + +Aggregating indexes are one of the key features in Firebolt that help accelerate query performance. Users can create these indexes while using one or more Firebolt supported [aggregate functions](/sql_reference/functions-reference/aggregation/), such as `COUNT, SUM, AVG, MIN, MAX, COUNT(DISTINCT)`, etc. One of the key promises that databases have is that indexes are automatically maintained and updated as the data in the base table change – this is no different with Firebolt. However, data modifications (and specifically deletes) are not the friendliest operations when it comes to maintaining aggregated indexes. Aggregated functions such as `COUNT, SUM, AVG` are composable aggregates, and as such can be easily updated/modified as data changes. However, aggregates such as `MIN, MAX, COUNT(DISTINCT)` do not fall into that category. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_engine_consumption.md b/cmd/docs-scrapper/fireboltdocs/overview_engine_consumption.md new file mode 100644 index 0000000..9da7995 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_engine_consumption.md @@ -0,0 +1,86 @@ +# [](#engine-consumption)Engine Consumption + +This page explains the engine consumption metric, Firebolt Units (FBUs), and provides detailed examples showing how to calculate the following: + +- The FBUs available to an engine. +- The actual FBUs consumed by a running engine. + +For more information about Firebolt engines, see [Understanding Engine Fundamentals](/Overview/engine-fundamentals.html). + +Firebolt uses a metric called Firebolt Units (FBU) to track engine consumption. The number of FBUs available to an engine is determined by the following engine attributes: + +- The node type. +- The number of nodes. +- The number of clusters. + +Each node type provides a minimum number of FBUs as shown in the following table: + +Node type Compute family Firebolt Units (FBUs) per hour Small (S) Storage-optimized 8 Medium (M) Storage-optimized 16 Large (L) Storage-optimized 32 Extra Large (XL) Storage-optimized 64 Small (S) Compute-optimized 4 Medium (M) Compute-optimized 8 Large (L) Compute-optimized 16 Extra Large (XL) Compute-optimized 32 + +Small and medium engines are available for use right away. If you want to use a large or extra-large engine, reach out to support@firebolt.io. + +As you add more nodes and more clusters to your engine, the number of FBUs available to the engine increases linearly. The available FBUs for a given engine configuration are calculated as follows: + +## [](#fbu-per-hour-for-a-given-engine--fbu-of-node-type-x-nodes-x-clusters)FBU-per-hour for a given Engine = (FBU of node Type x Nodes x Clusters) + +### [](#calculating-fbus-per-hour-for-a-given-engine---example-1)Calculating FBUs-per-hour for a given Engine - Example 1: + +- If you create an engine with the following configuration: TYPE = “S”, NODES = 2, CLUSTERS=1, it will have 16 FBUs available per hour (8 x 2 x 1). +- If you scale out the engine configuration to use 3 nodes instead of 2, the available FBUs per hour increases to 24 FBUs (8 x 3 x 1). + +### [](#calculating-fbus-per-hour-for-a-given-engine---example-2)Calculating FBUs-per-hour for a given Engine - Example 2: + +- If you create an engine with the following configuration: TYPE = “L”, NODES = 3, CLUSTERS=2, it will have 192 FBUs available per hour (32 x 3 x 2). +- If you scale down the engine to use a “M” type node, the available FBUs per hour will decrease to 96 FBUs (16 x 3 x 2). + +FBUs are consumed only when an engine is in a running state. The FBUs consumed by an engine depends on both the current configuration of the engine (node type, number of nodes and number of clusters) and the duration for which the engine has been running in that configuration. Since Firebolt provides per-second billing, the consumed FBU is calculated at per-second granularity, as shown in the following examples: + +## [](#fbus-consumed---fbus-per-hour--3600-x-engine-runtime-in-seconds)FBUs Consumed = (FBUs per hour / 3600) x (engine runtime in seconds) + +### [](#consumed-fbu---example-1)Consumed FBU - Example 1: + +If you create an engine with the following configuration: TYPE = “S”, NODES = 2, CLUSTERS=1, it has 16 available FBUs per hour. If the engine runs for only 15 minutes of that hour, then the engine consumes 4 FBUs. + +### [](#consumed-fbu---example-2)Consumed FBU - Example 2: + +If you create an engine with the following configuration: TYPE = “L”, NODES = 3, CLUSTERS=2, it has 192 FBUs available per hour. If the engine was only running for 40 seconds to quickly ingest a small amount of data. The consumed FBU is calculated as follows: + +FBU Consumed = (192/3600) x 40 = 2.13 + +## [](#the-impact-of-resize-operations-on-engine-consumption)The impact of resize operations on engine consumption + +If you perform scaling operations on an engine, you add additional compute resources (clusters) to the engine. These operations include scaling up or down, and scaling out or in. + +Any new clusters resulting from a resizing operation will have the new **node type** for a scale up or down operation. Any new clusters will have the desired **number of nodes** for a scale in or out operation. Existing clusters will be removed after they finish running any currently running queries. New queries will run on the new resized clusters. This can result in a temporary overlap when both old and new clusters are running concurrently, which will be reflected in the engine’s consumption. + +### [](#example-1)Example 1: + +If you create an engine with the following configuration: TYPE = “S”, NODES = 2, CLUSTERS=1, there are 16 available FBUs per hour. + +- **First 15 minutes during the hour:** The engine is running in the previous configuration. +- **After first 15 minutes:** The engine is scaled out to use five nodes, keeping the same node type and same number of clusters. The old cluster with two nodes continues to run for 5 minutes to finish executing the queries. The new cluster with five nodes runs for the next fifteen minutes. + +The consumed FBUs are calculated as follows: + +- **Minutes 1-15:** Only one cluster with two nodes is running, consuming 4 FBUs. +- **Minutes 16-20:** Two clusters are running. the cluster with nodes consumes 1.3 FBUs, and the cluster with five nodes consumes 3.3 FBUs. +- **Minutes 20-30:** Only the cluster with five nodes is running and consumes 6.7 FBUs. + +Total number of FBUs consumed for the hour = 4 + 1.3 + 3.3 + 6.7 = 15.3 FBUs. + +### [](#example-2)Example 2: + +If you create an engine with the following configuration: TYPE = “L”, NODES = 2, CLUSTERS=1, there are 64 available FBUs per hour. + +- **First 15 minutes during the hour:** The engine is running in the above configuration. +- **After first 15 minutes:** The engine is scaled down to use “M” type nodes, keeping the same number of nodes and same number of clusters. The old cluster with “L” type nodes continues to run for 5 minutes to finish running the queries. The new cluster with “M” type nodes runs for the next fifteen minutes. + +The consumed FBUs are calculated as follows: + +- **Minutes 1-15:** Only one cluster with two “L” type nodes is running, consuming 16 FBUs. +- **Minutes 16-20:** Two clusters running. Cluster with two “L” type nodes consumes 5.3 FBU and Cluster with two “M” type nodes consumes 2.7 FBU. +- **Minutes 20-30:** Only the cluster with two “M” type nodes is running, consuming 5.3 FBUs. + +Total number of FBUs consumed for the hour = 16 + 5.3 + 2.7 + 5.3 = 29.3 FBUs. + +Firebolt Engines are priced based on the amount of FBUs consumed by a given engine. For more details on engine pricing, including examples, visit the [Pricing page](https://www.firebolt.io/pricing). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_engine_fundamentals.md b/cmd/docs-scrapper/fireboltdocs/overview_engine_fundamentals.md new file mode 100644 index 0000000..14b91f7 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_engine_fundamentals.md @@ -0,0 +1,164 @@ +# [](#firebolt-engines)Firebolt Engines + +Engines are compute resources that process data and serve queries in Firebolt. Use engines to load data into Firebolt and run queries on the ingested data. + +Firebolt engines provide **full workload isolation**, so that multiple workloads run independently while sharing the same data. Engines are **decoupled from databases**, meaning: + +- A single engine can run queries on multiple databases. +- A database can be queried using multiple engines. + +Start, stop and modify engines at any time using the SQL API. You can also **dynamically scale engines** based on workload needs without stopping them. + +This document explains engine configuration, scaling, monitoring, security and connection options. + +**Topics** + +- [Key engine concepts](#key-engine-concepts) – Learn about the `TYPE`, `NODES`, and `CLUSTERS` attributes that define a Firebolt engine’s configuration and scaling options. +- [Multi-dimensional elasticity](#multi-dimensional-elasticity) – Scale engines dynamically by adjusting engine attributes without stopping workloads. +- [Connecting to engines](#connecting-to-engines) – How to connect to a Firebolt engine using the UI, Engine URL, or third-party tools. +- [Monitoring engine usage](#monitoring-engine-usage) – Track engine performance using observability views in `information_schema` to optimize resource allocation. +- [Engine governance and security](#engine-governance-and-security) – Control engine access using [Role-Based Access Control (RBAC)](/Overview/Security/Role-Based%20Access%20Control/) and account-level isolation to enforce security policies. +- [Viewing and understanding engine status](#viewing-and-understanding-engine-status) – Learn how to use `SHOW ENGINES` to check the status of all engines, including running, resizing, and stopped states. + +## [](#key-engine-concepts)Key engine concepts + +Engines in Firebolt are defined by three attributes: **Type**, **Nodes**, and **Clusters**. These attributes determine the engine’s configuration and scaling options. + +**Family** +Compute nodes can also be storage-optimized with larger cache sizes or compute-optimized which have smaller caches. The default is storage optimized. + +**Nodes** +This attribute represents the number (1 - 128) of compute nodes, allowing granular horizontal scaling to fine-tune query performance characteristics while avoiding overprovisioning and unnecessary cost. Both scaling in and out are supported. + +**Clusters** +A cluster is a collection of compute resources, described by “Type” and “Nodes” attributes. A given Firebolt engine can contain one or more clusters. The maximum number of clusters is specified by the Clusters attribute. Only homogeneous cluster configurations (clusters with the same number of Nodes and Type) are supported within a single engine. Users can leverage the “Clusters” attribute to support query concurrency scaling. + +![An engine cluster in Firebolt](/assets/images/engine_cluster_type_M.png) +**An engine cluster with four nodes of type ‘M’** + +![A Firebolt engine with two clusters, each cluster containing four nodes of type 'M'](/assets/images/Engine_two_clusters_type_M.png) +**A Firebolt engine with two clusters, each cluster containing four nodes of type ‘M’** + +The three attributes: `TYPE`, `NODES` and `CLUSTERS` – form the configuration of an engine. + +To create an engine, use the [CREATE ENGINE command](/sql_reference/commands/engines/create-engine.html), specifying the node type to be used for the engine, number of clusters and number of nodes per cluster. + +The following code example creates two clusters, each containing four nodes of type `M`: + +``` +CREATE ENGINE IF NOT EXISTS MyEngine +WITH TYPE = M +NODES = 4 +CLUSTERS = 2; +``` + +For a full list of engine attributes, see [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) + +## [](#multi-dimensional-elasticity)Multi-dimensional elasticity + +Firebolt engines enable dynamic and fully online scaling operations, meaning you do not need to stop your engines to scale them. You can scale an engine along three dimensions: + +Scaling Type Action Example SQL Command **Vertical Scaling** Change the node type `ALTER ENGINE MyEngine SET TYPE = L;` **Horizontal Scaling** Change the number of nodes `ALTER ENGINE MyEngine SET NODES = 3;` **Concurrency Scaling** Change the number of clusters `ALTER ENGINE MyEngine SET CLUSTERS = 2;` + +You can scale up or down using the engine type, scaling out or in with number of nodes and add or remove clusters for concurrency scaling. This multidimensional scaling allows you to fine-tune the price-performance characteristics of engines and dynamically scale your compute resources based on your workload requirements. + +Use the [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) to modify the configuration of an engine to dynamically scale the engine even while it is running, without impacting the workload. + +**Best practices** + +- Use a **larger node type** to improve performance for both single queries and multiple concurrent queries, especially as data size grows +- Increase the number of nodes for finer control over scaling, such as distributing workloads across multiple smaller nodes or when further vertical scaling is not possible. +- Increase the **number of clusters** to support higher query concurrency. + +The following code example uses `ALTER ENGINE` to horizontally scale an engine from two to three nodes: + +``` +ALTER ENGINE MyEngine SET NODES = 3; +``` + +The following code example changes the type of node used in an engine from ‘M’ to ‘L’: + +``` +ALTER ENGINE MyEngine SET TYPE = L; +``` + +The following code example changes more than one attribute at the same time: + +``` +ALTER ENGINE MyEngine SET NODES = 3 TYPE = L; +``` + +For more information on modifying engines, see [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html). + +## [](#connecting-to-engines)Connecting to engines + +You can connect to an engine using the following methods: + +- Firebolt’s [user interface](https://go.firebolt.io/login). +- An engine URL. +- Third-party [connectors](/Guides/integrations/integrations.html) such as Airflow and DBT. + +The engine URL is based on your account name and org name, with the following format: + +`..region.firebolt.io` + +The combined length of `account-name` and `org-name` must not exceed 62 characters. + +## [](#monitoring-engine-usage)Monitoring engine usage + +You can use the observability views in `information_schema` to track engine performance and usage. + +View Description `engine_metrics_history` Captures CPU and RAM usage every **30 seconds** and retains data for **30 days**. `engine_running_queries` Lists active queries and queries waiting to be processed. + +You can use the information in the previous [information\_schema views](/sql_reference/information-schema/) to decide whether you need to change the engine configuration type, number of nodes or clusters based on your workload needs. + +The [engine\_metrics\_history](/sql_reference/information-schema/engine-metrics-history.html) view gathers engine resource utilization metrics such as CPU and RAM consumption at a given time snapshot. Utilization snapshots are captured every 30 seconds and retained for 30 days, allowing users to understand engine utilization and consumption trends. + +The following code example retrieves CPU and RAM usage for `MyEngine`: + +``` +SELECT * +FROM information_schema.engine_metrics_history +WHERE engine_name = 'MyEngine' +ORDER BY event_time DESC; +``` + +The [engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) view exposes information about queries currently running or waiting to be run in the system. Based on the number of queries that are queued and waiting to run, you can modify the engine configuration to best suit your performance requirements. + +The following code example retrieves currently running queries: + +``` +SELECT * +FROM information_schema.engine_running_queries; +``` + +If the previous query shows that queries remain in the queue for too long, increase the number of nodes or clusters. + +To understand how information views can help with engine resizing, see [Working with Engines](/Guides/operate-engines/sizing-engines.html). + +## [](#engine-governance-and-security)Engine governance and security + +Firebolt provides **account-level isolation** and **Role Based Access Control (RBAC)** to provide strict governance over data access and infrastructure costs. + +**Account isolation** + +You can create multiple accounts within a given organization, where each account can represent a fully isolated environment such as development, test, or production. This enables engines across different environments to be fully isolated from each other. + +**RBAC for engine management** + +The Firebolt [RBAC model](/Guides/security/rbac.html) allows administrators to control user actions on resources that are created within a given account. For example, administrators can control which users are allowed to modify the configuration of engines and control which users can create new engines. + +The following code example creates an engine administrator role and grants it full permissions on `MyEngine`: + +``` +CREATE ROLE engine_admin; +GRANT ALL PRIVILEGES ON ENGINE MyEngine TO engine_admin; +``` + +For more information on using RBAC for engines, see [Governing Engines](/Guides/operate-engines/rbac-for-engines.html). + +## [](#viewing-and-understanding-engine-status)Viewing and understanding engine status + +Use [SHOW ENGINES](/sql_reference/commands/metadata/show-engines.html) to list all engines and their statuses in your Firebolt account as follows: + +`SHOW ENGINES` and UI Description STARTING The engine is provisioning resources, and will be ready to use soon. RUNNING The engine is running queries or available to run queries. The engine can be modified while it is running. RESIZING The engine is currently being resized after an `ALTER ENGINE` command. The engine will be in this state when the user has issued a request to change the engine `TYPE`, number of `NODES` or number of `CLUSTERS`. DRAINING The engine is completing running queries before shutting down. STOPPING The engine is shutting down and cannot accept new queries. STOPPED The engine is fully stopped and not available to run queries. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_indexes_aggregating_index.md b/cmd/docs-scrapper/fireboltdocs/overview_indexes_aggregating_index.md new file mode 100644 index 0000000..ae7310d --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_indexes_aggregating_index.md @@ -0,0 +1,75 @@ +# [](#aggregating-index)Aggregating index + +Firebolt’s aggregating index is a powerful tool for improving performance in large-scale analytics. Similar to a materialized view, it precomputes and stores the results of aggregate functions, allowing queries to access data directly from the index instead of scanning the entire table. This approach greatly reduces compute overhead and speeds up query times, making it especially useful for repetitive tasks like generating dashboards and reports. + +Aggregating indexes are automatically updated in real-time whenever new data is added or when changes, such as `DELETE` or `UPDATE` operations, are made to the base table. Firebolt also enhances performance by dividing aggregating indexes into smaller segments that are distributed across multiple engine nodes. This allows queries to run in parallel across these nodes, improving both scalability and efficiency while ensuring that the data remains consistent and accurate. + +Topics: + +- [Aggregating index](#aggregating-index) + + - [Key features](#key-features) + - [Syntax](#syntax) + + - [Parameters](#parameters) + - [Example](#example) + - [Considerations](#considerations) + +## [](#key-features)Key features + +- **Automatic synchronization** + + The aggregating index is automatically updated in real-time to reflect changes in the base table after each transaction including **DELETE** or **UPDATE** operations. +- **Automatic `COUNT(*)` aggregations** + + A `COUNT(*)` aggregation is automatically added to every aggregating index unless explicitly specified by the user. This ensures the index can accurately handle `DELETE` operations by tracking the number of rows affected. The aggregating index adjusts its precomputed results to remain synchronized with the base table, maintaining consistency and ensuring accurate query results. +- **Primary index deduction** + + The primary index for a Firebolt table is established based on the order of the **GROUP BY** keys specified during the creation of an aggregating index. Firebolt physically organizes the data according to these keys, which align with how queries group and aggregate the data. This optimization enables effective data pruning and reduces scan times, making data retrieval more efficient. + +## [](#syntax)Syntax + +``` +CREATE AGGREGATING INDEX +ON ( + + [, ...], + + [, ...] +); +``` + +### [](#parameters)Parameters + +Parameter Description `` The name of the aggregating index. `` The name of the table on which the index is created. `` Expressions specified as grouping keys or dimensions when creating the index. `` Aggregation functions applied to specific expressions. + +## [](#example)Example + +The following example creates an aggregating index `sales_agg_index` on the `sales` table and precomputes the `SUM` and `COUNT` aggregations: + +``` +CREATE AGGREGATING INDEX sales_agg_index +ON sales(product_id, region, SUM(sales_amount), COUNT(DISTINCT order_id)); +``` + +In the previous code example, `product_id` and `region` are grouping keys that are grouped together as `grouping_element`. These keys define how the data is grouped for aggregation, similar to the `GROUP BY` clause in a SQL query. Subsequent queries using these aggregations can retrieve the precomputed values directly from the index, avoiding a full table scan. + +Because the code example precomputes `SUM` and `COUNT`, subsequent queries using these aggregations can retrieve the precomputed values directly from the index, avoiding a full table scan. + +## [](#considerations)Considerations + +**Ingestion Overhead** + +Maintaining an aggregating index adds processing overhead during data loading, which can slow down Data Manipulation Language (DML) operations such as `INSERT`, `DELETE`, and `UPDATE` on the base table. + +**Vacuuming** + +To ensure optimal query performance, you should periodically **vacuum** the aggregating index. [VACUUM](/sql_reference/commands/data-management/vacuum.html) helps to: + +- Defragment data in the aggregating index table. +- Remove stale or deleted items from disk after `DELETE` operations on the base table. + +Regular vacuuming can improve query performance, especially for the following: + +- **Frequent batch inserts** – These inserts can lead to fragmented data in the aggregating index table. +- **Base table mutations** – Operations like `DELETE` or `UPDATE` on the base table can also cause fragmentation, impacting query performance. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_indexes_primary_index.md b/cmd/docs-scrapper/fireboltdocs/overview_indexes_primary_index.md new file mode 100644 index 0000000..d6d3038 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_indexes_primary_index.md @@ -0,0 +1,122 @@ +# [](#primary-index)Primary index + +The Firebolt primary index optimizes data retrieval by organizing it based on column values. This enables efficient data pruning and high-performance queries for large-scale analytics. The primary index ensures that queries target only the most relevant portions of the data, significantly reducing the volume of unnecessary scans. This selectivity is especially powerful when the indexed columns align closely with query patterns, allowing the database to quickly locate and retrieve the required data. As a result, query performance is not only optimized but also remains consistent even as data volumes grow. + +Topics: + +- [Key features](#key-features) +- [Syntax](#syntax) +- [Parameters](#parameters) +- [Example](#example) +- [Considerations](#considerations) +- [Advanced option: index granularity](#advanced-option-index-granularity) + +## [](#key-features)Key features + +- **Customizable indexing**: + + - Unlike traditional databases, Firebolt allows primary indexes on any column, so that you have the flexibility to align with query patterns. + - Primary key constraints are not enforced, allowing for greater customization. +- **Tablet-based data organization**: + + - Data is divided into tablets, which are self-contained, fixed-size data chunks approximately 3GB each, sorted by primary index keys for efficient querying. + - Within each tablet, your data is further segmented into intra-tablet ranges, approximately 8,000 rows each, for faster access. +- **Support for compound indexes**: + + - Multiple columns can be included in the primary index, optimizing complex query patterns by allowing efficient pruning and retrieval for queries that filter or group data based on combinations of these columns, such as filtering by both a date range and a category or joining fact and dimension tables on multiple keys. +- **Sparse indexing**: + + - Firebolt employs a sparse indexing approach, storing only the first row of each tablet range. This approach allows target reads and parallel processing, significantly reducing index size while increasing query performance. +- **Automatic metadata updates**: + + - The primary index is automatically maintained during data inserts, deletions, and updates ensuring optimal performance. +- **Handling low-cardinality clauses**: + + - Starting the primary index with low-cardinality columns can enhance pruning efficiency by creating long ordered runs of data. Low-cardinality columns are those with a limited number of unique values such as months, regions, or statuses, which group similar rows together. This reduces the number of tablet ranges Firebolt needs to scan, enabling faster query execution and better data pruning. +- **Inclusion of join key columns**: + + - In star schema designs, including join key columns, or foreign keys, in the primary index of fact tables can accelerate queries by facilitating efficient data retrieval. +- **Leverage indexed columns directly**: + + - Design queries to use indexed columns directly in WHERE clauses without transformations, ensuring that the primary index can be utilized to its full potential for faster data pruning and query execution. + +## [](#syntax)Syntax + +To define a primary index, use the following syntax within a `CREATE TABLE` statement: + +``` +CREATE TABLE ( + + [, , + ...] +) +PRIMARY INDEX [, , ...] +[WITH ( index_granularity = ) ]; +``` + +## [](#parameters)Parameters + +Parameter Description `table_name` The name of the table where the primary index is applied. `column_name1, ...` The columns chosen to be included in the primary index. `index_granularity` The maximum number of rows in each granule. See [Index granularity](#advanced-option-index-granularity). + +## [](#example)Example + +The following example creates a table with a primary index optimized for query performance by filtering for `SubmitDate` and `EngineName`: + +``` +CREATE [FACT|DIMENSION] TABLE QueryHistory ( + QueryID TEXT, + QueryText TEXT, + SubmitDate DATE, + EngineName TEXT, + SubmitTime DATE, + Latency INT +) +PRIMARY INDEX SubmitDate, EngineName; +``` + +## [](#considerations)Considerations + +- **Non-enforced primary key constraint**: + Firebolt does not enforce primary key constraints, so users must manage data integrity externally. +- **Managing fragmentation**: + Fragmentation can occur as you insert, delete, or update data in a table, which impacts storage efficiency and potentially affects your query performance. Firebolt provides tools to help mitigate this effect: + + - **Efficient deletion management**: + Instead of immediately removing rows from the table, Firebolt uses a deletion mask vector to flag rows as deleted. This vector marks rows for exclusion during queries while keeping the underlying data intact until cleanup is performed. + This approach ensures consistency and avoids disrupting the primary index during updates or deletions. + - **fragmentation metric**: + Use the `information_schema.tables` to access the fragmentation metric to assess fragmentation levels and determine whether maintenance actions are needed. + - **[VACUUM](/sql_reference/commands/data-management/vacuum.html) command**: + You can use the `VACUUM` command to clean up rows flagged for deletion and reorganize fragmented data. It is particularly useful when large numbers of rows have been deleted or updates have introduced significant fragmentation. +- **Query Performance Overhead**: + While sparse indexes enable targeted reads and parallel processing to improve query performance, they may still require scanning one tablet range from multiple tablets, even for highly selective filters. This can result in more data being scanned compared to a globally sorted index, potentially affecting performance in certain scenarios. +- **Column Selection**: + Choose columns with high selectivity and relevance to query patterns for optimal performance. **Selectivity** refers to the ability of a column to significantly narrow down the dataset when filtered, typically measured by the proportion of unique values in the column. Columns with higher selectivity, such as IDs or timestamps, help reduce the number of rows scanned, leading to faster query execution and better resource efficiency. + +Using Firebolt’s primary indexes can help you enhance your query performance, optimize data management, and scale efficiently for modern analytics workloads. + +## [](#advanced-option-index-granularity)Advanced option: index granularity + +The `index_granularity` [storage parameter](/sql_reference/commands/data-definition/create-fact-dimension-table.html#storage-parameters), specified in the `WITH` clause, is an advanced setting that may be useful for improving performance in very specific query patterns. It defines the maximum number of rows per granule, which directly impacts how data is indexed and queried. + +### [](#how-index-granularity-works)How index granularity works + +A granule is the smallest block of rows that Firebolt can skip or read during query filtering. Index granularity defines the number of rows in each granule. In other words, it sets the smallest group of rows the engine can access independently. + +- **Lower index granularity** creates smaller granules, allowing more precise filtering and reducing unnecessary row scans in selective queries. However, lower index granularity also increases memory usage and overhead from managing more granules. +- **Higher index granularity values** creates larger granules, lowering memory usage and management overhead but increasing the chance of scanning irrelevant rows, especially in selective queries. + +For more information about the fundamentals of Firebolt’s primary indexes and granules, see Firebolt’s blog post on [primary indexes](https://www.firebolt.io/blog/primary-indexes-in-firebolt-a-comprehensive-guide-to-understanding-managing-and-selecting). + +### [](#accepted-values)Accepted values + +`` must be a power of 2, ranging from 128 to 8192. The default value is 8192. We recommend using the default value, but lower values can decrease query latency by 10x or more in some query patterns. + +### [](#best-practices)Best practices + +Use the default value of `index_granularity`, which should translate to good performance for most queries. The following workload patterns may benefit from higher or lower values for `index_granularity`: + +- If your queries access only a few rows per granule, such as single-row queries or individual rows spread throughout a table, setting a **lower** `index_granularity` value can reduce unnecessary row scans and improve efficiency. However, this increases static memory usage for storing the index. +- If most of your queries scan large portions of the table, such as a large bounded range of primary index columns, a **higher** `index_granularity` value is more efficient, as it reduces index memory usage and overhead introduced by each granule boundary. + +If you want to adjust `index_granularity`, start with the default value, then create duplicate tables with different settings to compare both the query latency and memory usage. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_indexes_using_indexes.md b/cmd/docs-scrapper/fireboltdocs/overview_indexes_using_indexes.md new file mode 100644 index 0000000..f358d8f --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_indexes_using_indexes.md @@ -0,0 +1,446 @@ +# [](#data-modeling)Data modeling + +Firebolt optimizes data storage and retrieval by using indexing, partitioning, and compute scaling to enhance query efficiency. Design your data model and use databases, tables, and indexes to optimize query performance. + +- **Minimizing data scans** + Firebolt indexes retrieve only the specific data ranges needed to satisfy a query, reducing the amount of data scanned. +- **Automatic index maintenance** + During data loading, Firebolt automatically sorts, compresses, and incrementally updates indexes to keep them optimized. +- **Tablet-based optimization** + Data and indexes are committed as tablets, which Firebolt automatically merges and optimizes as the data evolves. +- **Best practices for performance** + This guide provides recommendations for organizing tables, indexes, and data to achieve fast query results and peak performance. + +## [](#how-it-works)How it works + +Firebolt’s indexing and partitioning strategies are designed to take advantage of a cloud-based architecture that scales to handle very large data sets. Data is queried using multiple nodes for parallel processing. Data is also stored by columns, which allows for: + +- **Optimized read operations** +- **Less disk space for storage** +- **Vectorized processing** + +Firebolt also separates compute resources from storage resources so you can scale either up or down depending on your use case. Optimize your resources based on your changing workloads, and pay only for what you use. + +Firebolt’s data modeling strategies work best with **Firebolt’s managed tables**, which leverage Firebolt’s performance optimizations. External tables, which allow users to access data without loading it into Firebolt, provide flexibility for querying external data sources like Amazon S3, but they do not offer the same performance benefits as managed tables. + +The following sections show you how to use the previous data modeling strategies to decrease the number of bytes scanned to improve query performance, reduce storage costs, and optimize compute resources. + +Topics: + +- [Data modeling](#data-modeling) + + - [How it works](#how-it-works) + + - [Databases](#databases) + + - [Create a database](#create-a-database) + - [Manage a database](#manage-a-database) + - [Database best practices](#database-best-practices) + + - [Database best practice example](#database-best-practice-example) + - [Evaluate your database for performance](#evaluate-your-database-for-performance) + - [Schema](#schema) + + - [Schema best practices](#schema-best-practices) + - [Tables](#tables) + + - [Firebolt-managed tables](#firebolt-managed-tables) + - [External tables](#external-tables) + - [Editing and deleting tables](#editing-and-deleting-tables) + - [Primary indexes in tables](#primary-indexes-in-tables) + + - [Create a primary index](#create-a-primary-index) + - [Primary index best practices](#primary-index-best-practices) + - [Aggregating indexes in tables](#aggregating-indexes-in-tables) + + - [Create an aggregating index](#create-an-aggregating-index) + - [Best practices for aggregate indexes](#best-practices-for-aggregate-indexes) + - [Partitions in tables](#partitions-in-tables) + + - [Create a partition](#create-a-partition) + - [Suggested indexes and partitions](#suggested-indexes-and-partitions) + - [Additional resources](#additional-resources) + +* * * + +### [](#databases)Databases + +**Logical structure** +In Firebolt, a database is a logical structure that organizes and stores data for efficient querying and management. Databases are created using the `CREATE DATABASE` statement and can be modified or deleted using `ALTER` and `DROP`. + +**Compute and storage separation** +Firebolt separates compute and storage layers, ensuring databases are efficiently managed without impacting processing power. + +**Data Security** +Databases integrate with Firebolt’s data security layers, enforcing access controls and user permissions to safeguard sensitive information. + +**Performance and integration** + +- **Fast analytics**: Firebolt databases enable quick querying of large datasets, supporting fast analytics. +- **Third-Party integration**: They are easily integrated with third-party tools through APIs and drivers for programming languages like Python and .NET. + +Database topics: + +- [Create a database](#create-a-database) – Use a system or user engine to create a database. +- [Manage a database](#manage-a-database) – How to edit and delete databases. +- [Database best practices](#database-best-practices) – How to organize your databases for the best performance. +- [Evaluate your database for performance](#evaluate-your-database-for-performance). + +#### [](#create-a-database)Create a database + +Use [CREATE DATABASE](/sql_reference/commands/data-definition/create-database.html), which requires only the name of the database, and an **engine** to create a database. + +Firebolt provides two types of engines: + +- **System engines** handle administrative tasks like creating and managing databases and tables without any need for configuration. They are always available but cannot process user data. **User engines** perform both administrative tasks and handle queries that access and process user data. User engines provide the flexibility needed for data processing and analysis. + +The following code example creates a `test_library` database with an optional description: + +``` +CREATE DATABASE IF NOT EXISTS test_library + WITH DESCRIPTION = 'database about library'; +``` + +#### [](#manage-a-database)Manage a database + +After you create a database, you can create additional objects and run queries within it. You can also modify the database parameters. + +The following code example modifies the description of a database: + +``` +ALTER DATABASE library SET DESCRIPTION = 'database about library'; +``` + +You can delete a database with [DROP DATABASE](/sql_reference/commands/data-definition/drop-database.html), which permanently deletes the database and all its contents. Since this action cannot be undone, use it carefully. Create a backup before dropping the database to prevent data loss. + +The following code example uses the optional `IF EXISTS` clause to see if the `test_library` database exists, and then deletes it: + +``` +DROP DATABASE IF EXISTS test_library; +``` + +Active queries will continue using the latest version of the database, but new queries submitted after the database is dropped will fail. To minimize disruptions, monitor active queries and notify users in advance. + +#### [](#database-best-practices)Database best practices + +Efficient database design in Firebolt is key to optimizing query performance, managing scalability, and ensuring workload isolation. By using databases to logically separate data, workloads from each other. You can reduce the impact of schema changes, minimize query latency, and enhance access control through role-based policies. For example, when managing data for applications that operate across multiple regions, understanding when to use a single database versus separate databases can significantly affect performance and operational efficiency. The following best practices outline how to structure databases in Firebolt to achieve these goals: + +- **Use databases to logically separate workloads** – Organize data into separate databases to isolate workloads and reduce the impact of schema changes on unrelated queries. +- **Group related tables in the same database** – Only include related tables within the same database to minimize the overhead caused by metadata synchronization during schema changes. +- **Separate data for different regions or use cases** – For multi-region applications, create separate databases for each region to avoid conflicts such as overlapping primary keys, and to prevent schema changes in one region from affecting query performance in another. +- **Minimize the number of tables in a single database** – Avoid adding too many unique tables to a single database, as schema changes require the metadata service to synchronize all associated schemas, potentially increasing query latency. +- **Leverage logical separation for access control** – Use separate databases to enforce role-based access control policies, ensuring users can only access data relevant to their roles and restricting queries across databases. +- **Consider metadata sync impacts on query latency** – Metadata synchronization can introduce slight query latencies after schema changes, especially as the number of tables in a database grows. + +##### [](#database-best-practice-example)Database best practice example + +The maker of the fictional [UltraFast game](https://help.firebolt.io/t/ultra-fast-gaming-firebolt-sample-dataset/250) plans to expand into a second region, where the game will generate data with the same structure and format as the first region but potentially overlapping primary keys. + +The developers have two options: + +1. Store the new region’s data in the existing database. +2. Create a separate database specifically for the new region. + +**Recommended solution** Create a separate database for the new region for the following reasons: + +- **Optimal Query Performance** – Firebolt’s decoupled architecture requires metadata synchronization for schema changes. As the number of tables in a database grows, this synchronization can introduce slight query latency. Storing data for each region in separate databases eliminates this risk and ensures efficient query performance. +- **Improved data isolation** – Queries for one region are not impacted by schema changes in another. Separate databases allow role-based access control, restricting access to data by region, which enhances security and operational control. + +By creating a dedicated database for each region, the developers ensure optimal performance, scalability, and data management tailored to the needs of the UltraFast game expansion. + +#### [](#evaluate-your-database-for-performance)Evaluate your database for performance + +You can use the code in the [Firebolt DDL Performance GitHub repository](https://github.com/wagjamin/firebolt-ddl-performance/tree/main) to measure the efficiency of your database design. Use the code to generate a set of pdf plots for latency to evaluate your choices including the following: + +- How increasing the number of tables impacts schema synchronization times. +- Analyze whether separating unrelated tables into different databases improves performance. +- Validate the benefits of isolating data for different use cases or regions in separate databases. +- Understand the trade-offs of schema changes on query latency and how to mitigate them through better database organization. + +* * * + +### [](#schema)Schema + +A schema serves as a blueprint for how data is organized within a database, acting as a logical framework that groups tables, views, and indexes. Understanding how to structure and manage schemas is crucial for maintaining an organized database environment, making it easier to optimize queries and manage access. While schemas don’t directly improve query performance, the way you organize your data within them can significantly impact how efficiently queries run, especially as your data grows. + +#### [](#schema-best-practices)Schema best practices + +To enhance query performance, simplify access control, and ensure scalability as your data grows, use the following best practices: + +- **Logical Organization** – Grouping related tables, views, and indexes within a schema, to create a logical structure that makes it easier to navigate and optimize your queries. Well-organized data allows for clearer queries, reducing complexity and improving readability. +- **Simplified Access Control** – Using schemas to manage access to certain tables or views ensures that only relevant data is queried by specific users or roles. This prevents unnecessary data scans or joins, which can improve query execution time. +- **Scalability** – As your database grows, a well-structured schema becomes even more important. Organizing data in a way that scales efficiently prevents bottlenecks, ensuring that queries can continue to run quickly even with larger datasets. + +* * * + +### [](#tables)Tables + +In Firebolt, tables are the key components for organizing and storing data. They consist of rows (records) and columns (attributes), making them an integral tool in data modeling. Firebolt supports two main types of tables: + +Table topics: + +- [Firebolt-managed tables](#firebolt-managed-tables) – Fast and dimension tables make the best use of Firebolt’s optimization strategies. +- [External tables](#external-tables) – Users can access and query data without loading it into Firebolt. External tables generally have poorer performance compared to Firebolt-managed tables. +- [Editing and deleting tables](#editing-and-deleting-tables) – You can edit and delete an existing table. +- [Primary indexes in tables](#primary-indexes-in-tables) – Select the most efficient primary index for your tables based on your query patterns and data characteristics. +- [Aggregating indexes in tables](#aggregating-indexes-in-tables) – Precompute and update aggregation results for fast calculations and to save compute resources. +- [Partitions in tables](#partitions-in-tables) – Organize data based on date ranges, regions, customer types or other categories. +- [Suggested indexes and partitions](#suggested-indexes-and-partitions) – Use Firebolt’s [RECOMMEND\_DDL](/sql_reference/commands/queries/recommend_ddl.html) tool to suggest indexes and partitions based on your query history. + +#### [](#firebolt-managed-tables)Firebolt-managed tables + +Tables that are fully controlled by Firebolt make the best use of Firebolt’s optimization strategies. There are two types of managed tables, which serve different but complementary purposes: + +- **Fact tables** – store large volumes of quantitative data, such as sales metrics or user interactions, and are used for analysis and reporting. +- **Dimension tables** – which hold descriptive data for enriching analyses, such as customer details or product categories. These tables are often duplicated across all nodes, and can dramatically improve query performance by providing fast access to frequently referenced information. + +By designing tables with both fact and dimension roles, users can optimize data management for reporting and analytics. + +**Fact tables** + +The following code example creates a **fact table**: + +``` +CREATE TABLE borrowedbooks ( + transaction_id INT, + book_id INT, + member_id INT, + checkout_date DATE, + return_date DATE, + late_fees NUMERIC +); +``` + +The internal structure of a fact table facilitates fast access to data even for very large datasets. + +**Dimension tables** + +Dimension tables are often replicated across all nodes, which enhances query performance and ensures quick access to frequently referenced information. + +The following code example creates a **dimension table**: + +``` +CREATE DIMENSION TABLE books ( + book_id INT, + title TEXT, + author TEXT, + genre TEXT, + publication_year INT +); +``` + +In the previous example, the `books` table stores details about the publication and is ideal for joining with fact tables like `borrowedbooks` to analyze borrowing patterns or book popularity. + +#### [](#external-tables)External tables + +External tables allow users to access and query data stored outside the database, such as in Amazon S3, without loading it into Firebolt. This capability is particularly useful when working with large datasets stored externally that don’t require frequent access, enabling cost-efficient querying and avoiding data duplication. External tables generally have poorer performance compared to Firebolt managed tables because the data is not stored within Firebolt’s optimized infrastructure. + +#### [](#editing-and-deleting-tables)Editing and deleting tables + +After you create a table, you can modify your table ownership, delete it including all of its dependencies. Use the following SQL commands to manage your table: + +**Change table ownership** + +To change the ownership of a table, use the ALTER TABLE statement with the OWNER TO clause. + +The following command assigns a new owner to the `borrowedbooks` table: + +``` +ALTER TABLE borrowedbooks OWNER TO 'new_owner'; +``` + +**Delete a managed table** + +Dropping a Firebolt managed table permanently deletes it along with all its data. + +The following code checks to see if the `borrowedbooks` table exists and then deletes it: + +``` +DROP TABLE IF EXISTS borrowedbooks; +``` + +**Delete a managed table that has dependencies** + +If the table that you want to delete has dependent objects such as views or aggregating indexes, you can use the `CASCADE` option to remove all dependencies together. + +The following code deletes the `borrowedbooks` table and all related objects: + +``` +DROP TABLE borrowedbooks CASCADE; +``` + +**Drop an external table** + +If you drop an external table, you only remove its definition from Firebolt. The actual data remains in the external source. The following code drops the `external_books` external table: + +``` +DROP TABLE external_books; +``` + +* * * + +#### [](#primary-indexes-in-tables)Primary indexes in tables + +Primary indexes optimize query performance by logically organizing data for efficient access based on key columns, using sparse indexing to minimize unnecessary scans during queries. This allows Firebolt’s engine to prune unnecessary data during queries, minimizing the amount of data read from disk, and accelerating query execution. By selecting columns frequently used in filters, the primary index enables fast, efficient data retrieval, which is crucial for large-scale, data-intensive applications. Properly configuring primary indexes ensures that Firebolt can maintain high performance, even with complex queries and large datasets. + +**How Firebolt uses a primary index to optimize performance** + +When new data is inserted into a table, Firebolt organizes it into tablets, which are logical partitions of the table. Each tablet holds a portion of the data, sorted according to the primary index. During query processing, Firebolt uses these indexes to eliminate blocks of rows not matching query predicate and scan only the necessary data, minimizing input and output operations, and optimizing performance. + +For updates, Firebolt follows a “delete-and-insert” approach: the original row is marked for deletion, and the updated row is inserted into a new tablet. Deleted rows are not removed immediately but are flagged and later cleaned up during maintenance tasks. + +##### [](#create-a-primary-index)Create a primary index + +A primary index can only be defined when creating a new table, so if you need to modify the index, you’ll have to create a new table. To define a primary index, use the `PRIMARY INDEX` clause to specify your columns, as shown in the following example: + +``` +CREATE TABLE borrowedbooks_pi( + transaction_id INT, + book_id INT, + member_id INT, + checkout_date DATE, + return_date DATE, + late_fees NUMERIC) +PRIMARY INDEX (book_id, checkout_date); +``` + +In the previous example, [CREATE TABLE](/sql_reference/commands/data-definition/create-fact-dimension-table.html) creates a fact table by default, and the primary index acts as a composite index of both the `book_id` and `checkout_date` columns. + +You can also create a primary index on a dimension table as shown in the following code example: + +``` +CREATE dimension TABLE books_pi( + book_id INT, + title TEXT, + author TEXT, + genre TEXT, + publication_year INT +) +PRIMARY INDEX (book_id, title); +``` + +The primary index should include columns that are often used in queries that filter or aggregate data, including those that use `WHERE` and `GROUP BY` clauses, so that Firebolt can use these columns for efficient retrieval. Firebolt physically clusters the data in the table by the columns of the primary index. The primary index significantly improves query performance because it reduces the time spent scanning irrelevant rows and allows the query engine to retrieve only the necessary data. + +##### [](#primary-index-best-practices)Primary index best practices + +To optimize query performance and maximize indexing efficiency, use the following best practices for designing composite primary indexes: + +- **Column selection** – Carefully select which columns to include in a composite primary index based on query patterns, cardinality, and the specific use cases for your table. Adding unnecessary columns can negatively impact performance. +- **Column order** – The order of columns in a composite primary index is critical. Place the most selective, or highest cardinality columns first in a composite primary index. Firebolt will search based on the order of columns in the primary index. High-cardinality columns, which have many distinct values, improve index efficiency by reducing the number of rows that need to be scanned during queries. + +#### [](#aggregating-indexes-in-tables)Aggregating indexes in tables + +Aggregating indexes precompute and store aggregation results from aggregation functions like `SUM`, `COUNT`, and `AVG`, as well as more complex calculations. When the underlying data is changed, Firebolt recalculates aggregate indexes automatically, so that they are always updated when you query them. Firebolt handles the following scenarios: + +- Inserts – When new rows are added to a table included in an aggregating index, Firebolt recalculates the value to include the new data. +- Updates – If existing rows are updated, Firebolt adjusts the aggregation values to reflect the changes. +- Deletions – When rows are deleted, the aggregating index is recalculated to remove the row from the precomputed aggregated values. + +When you utilize an aggregating index through a query, Firebolt utilizes the pre-calculated values instead of computing them in real-time. This reduces the computational burden at runtime and significantly speeds up query response times, especially for large datasets with high concurrency demands. Aggregating indexes are especially useful for frequently run queries that involve repeated aggregation operations, such as calculating totals, averages, or counts. + +Aggregating indexes do require additional storage, because the precomputed data needs to be maintained. In write-heavy environments, frequent updates, inserts, or deletes can lead to increased computational overhead as the indexes must be recalculated and kept up to date. This can result in higher compute costs. It’s important to consider the performance benefits of faster queries against the additional storage and processing costs, especially for frequently changing datasets. + +##### [](#create-an-aggregating-index)Create an aggregating index + +Aggregating indexes can be created at the time a new table is made or afterward. You can define it as needed based on query patterns and performance. + +The following code example shows how to create an aggregating index to precompute the number of transactions per borrower and their average late fee on the existing `borrowedbooks` table created in the previous **Fact tables** section in [Firebolt managed-tables](#firebolt-managed-tables): + +``` +CREATE AGGREGATING INDEX agg_borrower_statistics + ON borrowedbooks ( + borrower_id, + COUNT(transaction_id), + AVG(late_fee) + ); +``` + +When Firebolt runs a query that accesses either `transaction_id` or `late_fee`, it retrieves the precomputed results from the aggregating index, rather than computing them. + +The following code example shows how to create an aggregating index to precompute the total late fees accumulated for each book: + +``` +CREATE AGGREGATING INDEX agg_total_late_fees + ON borrowedbooks ( + book_id, + SUM(late_fee) + ); +``` + +Additionally, aggregating indexes integrate seamlessly with Firebolt’s partitioning strategies, further improving query performance by allowing the query engine to access only the relevant partitions. This reduces the amount of data scanned and processed, particularly when dealing with large, partitioned datasets. The combination of partition pruning and aggregate indexing helps achieve superior query performance in data-intensive environments, allowing for quicker insights and more efficient use of system resources. + +##### [](#best-practices-for-aggregate-indexes)Best practices for aggregate indexes + +To optimize query performance and manage resources effectively, follow these best practices for creating and maintaining aggregating indexes: + +- **Focus on frequently run queries** – Create aggregating indexes for queries that use frequent aggregations including `SUM`, `COUNT`, and `AVG` that are run repeatedly. +- **Choose columns used in aggregations** – Select columns commonly used in GROUP BY and aggregation functions such as `borrower_id` or `book_id` from the examples in this section. +- **Choose columns strategically** – Build indexes on columns that are frequently queried for aggregations such as `customer_id` or `order_id` from the examples in this section. Avoid creating multiple indexes with overlapping aggregations on the same columns to minimize unnecessary overhead and costs. +- **Consider Data Freshness** – Ensure the performance gains of precomputed values outweigh the index maintenance costs for frequently changing data. + +#### [](#partitions-in-tables)Partitions in tables + +Firebolt’s table partitioning supports efficient data lifecycle management by organizing data based on specific criteria: date ranges, regions, product categories, or customer types. This helps streamline data retention, archiving, and access, ensuring optimal performance. Partitioning reduces the amount of data scanned during queries, improving speed, particularly when query patterns consistently filter data by columns like date or region. It also enables data pruning, skipping partitions that don’t match the query criteria, making tasks like vacuuming and archiving more efficient. + +##### [](#create-a-partition)Create a partition + +You can create a partition for a new table; however, it’s recommended to define the partition when creating a new table to ensure the data is organized from the start. + +The following code example **creates a new table** with a primary index, and partitions the table by month: + +``` +CREATE TABLE librarybooks ( + transaction_id INT, + book_id INT, + borrower_id INT, + checkout_date DATE, + due_date DATE, + return_date DATE, + late_fee DECIMAL(10, 2) +) +PRIMARY INDEX transaction_id +PARTITION BY DATE_TRUNC('month', checkout_date); +``` + +In the previous example, the new `librarybooks` table is partitioned by the month of `checkout_date`, so that Firebolt can prune irrelevant partitions and speed up queries for specific date ranges. + +You can **drop** an existing partition as shown in the following code example, which removes a partition for January 1, 2023: + +``` +ALTER TABLE borrowedbooks +DROP PARTITION '2023-01-01'; +``` + +#### [](#suggested-indexes-and-partitions)Suggested indexes and partitions + +If you understand your data and query patterns, you should select high-cardinality columns frequently used in `WHERE`, `JOIN`, or `GROUP BY` clauses to minimize the amount of data scanned and improve query efficiency. + +If you don’t know how to effectively select a primary index or partition your data, you can use Firebolt’s [RECOMMEND\_DDL](/sql_reference/commands/queries/recommend_ddl.html) tool to provide automated insights. `RECOMMEND_DDL` will make recommendations to optimize database performance by analyzing your query patterns and suggesting the most efficient configurations for primary indexes and partitioning. By examining historical query data, the tool identifies columns frequently used in filtering, grouping, or aggregating operations and recommends appropriate primary indexes and partition keys. These suggestions help reduce the amount of data scanned during queries, enabling faster execution and improved resource utilization. + +`RECOMMEND_DDL` is particularly useful in complex environments where query patterns evolve over time. By reviewing historical query data, Firebolt identifies columns that are frequently used in filtering or aggregation and recommends appropriate primary index and partitioning strategies. + +The following code example uses `RECOMMEND_DDL` to analyze query patterns on the books table, created in the \*\*Dimension tables \** section under [Firebolt managed-tables](#firebolt-managed-tables), based on queries run in the past week: + +``` +CALL recommend_ddl( + books, + (SELECT query_text FROM information_schema.engine_query_history WHERE query_start_ts > NOW() - INTERVAL '1 week') +); +``` + +If the `books` table is frequently queried based on `genre` and `book_id`, Firebolt’s `RECOMMEND_DDL` command might provide the following example output: + +recommended\_partition\_key recommended\_primary\_index average\_pruning\_improvement analyzed\_queries DATE\_TRUNC(‘month’, borrow\_date) book\_id, borrower\_id 0.35 200 + +The example output under `recommended_partition_key` suggests partitioning the `borrowedbooks` table by month based on the `borrow_date` column. The `recommended_primary_index` suggests creating a primary index on the `book_id` and `borrower_id` columns. An average pruning improvement of 0.35 indicates 35% less data will be scanned on average by applying these recommendations. The analyzed queries column shows that 200 queries were analyzed to generate these suggestions. + +### [](#additional-resources)Additional resources + +- [Working with tables](/Overview/indexes/using-indexes.html#firebolt-managed-tables) – An overview of how to create, manage, and optimize tables. +- [Partitions in tables](/Overview/indexes/using-indexes.html#partitions-in-tables) – How to divide large tables into smaller partitions for optimal query efficiency and performance. +- [RECOMMEND\_DDL](/sql_reference/commands/queries/recommend_ddl.html) – Information on syntax, parameters and examples of using Firebolt’s tool to automatically recommend optimal primary index and partition strategies. + +* * * + +- [Aggregating index](/Overview/indexes/aggregating-index.html) +- [Primary index](/Overview/indexes/primary-index.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_organizations_accounts.md b/cmd/docs-scrapper/fireboltdocs/overview_organizations_accounts.md new file mode 100644 index 0000000..7e2bc11 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_organizations_accounts.md @@ -0,0 +1,228 @@ +# [](#organizations-and-accounts)Organizations and accounts + +A governance model can help manage cloud data warehouse resources by addressing challenges such as data security, cost management, resource isolation, and observability. For example, development, staging, and production environments often require isolation to prevent unintentional changes in development from affecting production or to restrict developers’ access to only their code and data. Similarly, departments may need isolated access to their resources while limiting access to others. Additionally, governance models can support consolidated billing while providing visibility into consumption by department or development environment. + +To address these requirements, Firebolt supports concepts of organizations and accounts. You can have different accounts within your organization and additionally benefit from consolidated billing, unified authentication, and efficient account management across all accounts. + +**Topics** + +- [Organizations and accounts](#organizations-and-accounts) + + - [Organizations](#organizations) + - [Accounts](#accounts) + + - [Users](#users) + - [Roles](#roles) + + - [Firebolt built-in roles](#firebolt-built-in-roles) + + - [Public role](#public-role) + - [System administrative role](#system-administrative-role) + - [Account administrative role](#account-administrative-role) + - [Organizational administrative role](#organizational-administrative-role) + - [Additional resources](#additional-resources) + + - [Billing](#billing) + - [Security](#security) + - [Available regions](#available-regions) + +The Firebolt object model is hierarchical and comes with strong containment properties in that parent objects can contain one or more child objects. Child objects are sole children of their parent objects and cannot be shared. Furthermore, there are two classes of objects: global and regional. Global objects are managed globally and can contain objects that are deployed and grouped regionally. + +The following Firebolt object model depicts an **organization** at the highest level, with **four** layers directly underneath it: + +![Firebolt's organizational structure has four layers](../../assets/images/manage-organization-org-structure.png) + +## [](#organizations)Organizations + +An organization is a fundamental object in Firebolt, providing a logical structure for managing accounts, billing, and authentication. When registering to Firebolt, the organization name you’ll provide is the same as the domain name you use in your email. Organization names are globally unique. No two organizations can have the same name, but organizations can contain multiple accounts. Each account can contain multiple objects including users, roles, databases, tables, views, and engines. + +In the Firebolt object model, an organization has the following levels: + +- **Login** - an email associated with a user that is used for identification. A single login can be associated with users across multiple accounts, but can only be associated with one user per account. For more information about logins, see [Manage logins](/Guides/managing-your-organization/managing-logins.html). +- **Service account** - Use a service account to access Firebolt programmatically through an API. For more information about how to set up and manage a service account, see [Manage programmatic access](/Guides/managing-your-organization/service-accounts.html). +- **Network policy** - Set a network security configuration that controls and restricts network access to specific resources within Firebolt based on IP addresses or IP ranges. See [Manage network policies](/Guides/security/network-policies.html). +- **Account** - A group of resources which can include a database, an engine and several users with associated logins or service accounts and roles. See the following [Accounts](#accounts) section for high-level information about accounts and [Manage accounts](/Guides/managing-your-organization/managing-accounts.html) for more detailed information. Under account are the following levels: + + - **User** - An individual with specific permissions and roles that allow access to and interaction with the database and engine within an account. A user must be associated with a login (or service account), and with an account. See the following [Users](#users) section for more high-level information and [Manage users and roles](/Guides/managing-your-organization/managing-users.html) for more detailed information. + - **Role** - A set of permissions that defines a user’s access and capabilities, which can be assigned to one or more users to manage their privileges. See the following Roles section for more high-level information and Manage users and roles for more detailed information. + - **Database** - A logical collection of schemas and data objects, such as tables and views, that organizes and manages user data and metadata for querying and data processing. For more information about databases see [Create a Database](/Guides/getting-started/get-started-sql.html#create-a-database) in the [Get started using SQL](/Guides/getting-started/get-started-sql.html) guide. Under database are the following levels: + + - **Schema** - A collection of system views containing metadata about objects in the current database, including tables, columns, indexes, and other database components. For more information, see [Information Schema Views](/sql_reference/information-schema/). Under schema are the following levels: + + - **External table** - Tables that store metadata objects that reference files stored in an Amazon S3 bucket, rather than actual data. For more information, see [Work with external tables](/Guides/loading-data/working-with-external-tables.html). + - **Managed table** - A Firebolt-managed internal structured data object within a database that stores rows and columns of data. Firebolt’s managed tables have built-in optimizations for fast query processing times. For more information, see [Tables](/Overview/indexes/using-indexes.html#tables). + - **View** - A virtual table that represents the result of a stored query, including both user-defined views and [information schema views](/sql_reference/information-schema/), which provide metadata about database objects like tables, columns, and indexes. + - **Index** - A database structure that optimizes data retrieval by organizing specific columns, improving query performance and enabling efficient filtering, sorting, and joining of datasets. For more information about indexes, see [Data modeling](/Overview/indexes/using-indexes.html). + - **Engine** - A compute resource that processes queries and manages data operations and can be scaled independently and turned on or off as needed. For more information, see [Firebolt Engines](/Overview/engine-fundamentals.html). + +When you [register for the first time](/Guides/managing-your-organization/creating-an-organization.html), Firebolt sets up an organization for you. During registration, you’ll set up your first account, with one user. The first user that is added is the account administrator, as shown in the following diagram: + +![When you first register, Firebolt sets up an organization with one account and user that has account administrator privileges.](../../assets/images/manage-organization-first-registration.png) + +Then, you can add resources and users to this account. The following apply: + +- You can have multiple users within an account. +- A user should be associated with either a login email for personal access or to a service account, for programmatic access. +- You can have multiple accounts in an organization. +- Each account name within an organization must be unique. +- You can’t have one account in multiple AWS regions. +- You can add resources such as databases and engines to the account. + +**Example** + +In the following example structure, an organization has an account set up for their marketing department and for managers in two different AWS regions: + +![An account can span only one AWS region.](../../assets/images/manage-organization-accounts-regions.png) + +In the previous diagram, the organization has three separate accounts: + +- A `marketing_account`, which has access to resources associated with marketing tasks, and two different users. The `user_1` user is associated with a `login_1` linked to an email account. +- A `manager_account_region_1`, which has access to resources associated with a manager account in one AWS region, and one `user_3` that is associated with login\_1 linked to the same email account as `user_1` in marketing\_account. +- A `manager_account_region_2`, which has access to resources in a different region than `manager_account_region_1`, and one user also associated with `login_1`. + +The manager of the marketing department, `user_1`, is associated with `login_1`, which is associated with both the marketing account and both manager accounts. These accounts have access to a different set of resources and permissions. The users `user_1`, `user_3`, and `user_4` are all the same person because they have the same login and email. The manager also manages projects across AWS regions and must access those resources in a **different** account. Another employee, `user_2`, works in the marketing department and has access to only the marketing resources designated to `marketing_account` using permissions defined by his role. + +## [](#accounts)Accounts + +An account in Firebolt is an object within an organization that encapsulates resources for storing, querying, and managing data. Accounts provide: + +- **Access control:** Firebolt implements role-based access control (RBAC). Every object in the Firebolt object model is a securable and it comes with a set of privileges. Privileges allow administrators to control functionality Firebolt users can exercise when logged in. +- **Data modeling:** Using objects including databases, tables, views, and indexes, developers and architects can design their data warehouses and describe various business entities without compromising to deliver on ever-demanding performance needs. +- **Cost control:** With engines, system administrators can deploy engines that fit the need while achieving desired price-performance characteristics. Engines can scale vertically up and down, and horizontally out and in to meet business needs while allowing granular cost control. +- **Workload management:** Firebolt offers full workload isolation for computations, data and metadata. Firebolt users can deploy separate engines to support heterogeneous workloads, while having access to the same data. Firebolt supports a variety of workloads, including data-intensive applications requiring instant data access, complex business-critical dashboards needing timely updates, and intricate Extract-Load-Transform (ELT) processes for data ingestion. + +Each account in Firebolt exists in a single AWS region, and can have engines and databases associated with it. Initially after registration, an account contains no resources, and only one user that has an account administrator role. An account can contain many users, as shown in the following diagram: + +![In Firebolt, an account can contain many users, an engine and database.](../../assets/images/manage-organization-accounts-contents.png) + +### [](#users)Users + +A user must be associated with a role, which grants them permission to access resources. These users can be associated with different roles within a single account. Each user must be associated with either a login for personal access or a service account for programmatic access, as shown in the following diagram: + +![A user must be associated with either a login or a service account.](../../assets/images/user_login_service-account.png) + +A login consists of an **email address**. This login uniquely identifies the user. + +**Example** + +In the following example account structure, `user_1` has a manager role that grants access to engines and databases associated with human resources tasks, as well as a marketing role that grants them access to everything that their employee has access to. A marketing employee, `user_2`, has read-only access to the tables in the database in `marketing_account`, but they cannot insert new entries or delete entries from a table. + +![A user can have multiple roles in an account.](../../assets/images/manage-organization-marketing-accounts-example.png) + +### [](#roles)Roles + +In Firebolt, each user is associated with either a **login**, which is an email address, or a **service account**. Each user must also have a role, as shown in the following diagram: + +![A user must be associated with either a login or a service account.](../../assets/images/user_login_service-account.png) + +The role grants the user permission to access resources inside the account that they are associated with. A user can have several roles associated with them at the same time. Firebolt has built-in roles with defined permissions. You can also define a [custom role](/Overview/Security/Role-Based%20Access%20Control/role-management/custom-roles.html) that grants permissions specific to your use case. + +#### [](#firebolt-built-in-roles)Firebolt built-in roles + +Firebolt has the following built-in roles with associated permissions for objects including databases, engines, users, network policies, and accounts: + +- [Organizations and accounts](#organizations-and-accounts) + + - [Organizations](#organizations) + - [Accounts](#accounts) + + - [Users](#users) + - [Roles](#roles) + + - [Firebolt built-in roles](#firebolt-built-in-roles) + + - [Public role](#public-role) + - [System administrative role](#system-administrative-role) + - [Account administrative role](#account-administrative-role) + - [Organizational administrative role](#organizational-administrative-role) + - [Additional resources](#additional-resources) + + - [Billing](#billing) + - [Security](#security) + - [Available regions](#available-regions) + +##### [](#public-role)Public role + +A public role is associated with a user that can: + +- Use a database. +- Use a public schema. +- Create a public table. +- Create a public view. +- Create a public index. +- Create a public external table. + +A public role has the **lowest** access privileges of all roles in Firebolt, as shown in the following diagram: + +![The public role has permission for schema and its underlying components.](../../assets/images/manage-organization-public-role.png) + +##### [](#system-administrative-role)System administrative role + +A system administrative role has privileges to manage databases, engines, schemas, and objects within those schemas. A system administrator can: + +- Create a database in an account. +- Create an engine in an account. +- Monitor engine use. +- Has all privileges for: + + - Any database and its properties. + - Any engine and its properties. + - Any schema. + - Any view. + - Any external table. + +The previous system administrative privileges are shown in the following diagram: + +![The system admin role has privileges in database, engine, and schema plus children.](../../assets/images/manage-organization-sys-admin-role.png) + +##### [](#account-administrative-role)Account administrative role + +An account administrative role includes all privileges associated with system administrators and can also manage accounts and users. An account administrator has: + +- All system administrator privileges. +- All privileges for an account. +- The ability to meter and monitor account use. +- The ability to cancel a query on any engine in an account. + +The previous account administrative privileges are shown in the following diagram: + +![An account admin has privileges over the account and all its children.](../../assets/images/manage-organization-account-admin-role.png) + +##### [](#organizational-administrative-role)Organizational administrative role + +An organizational administrative role has all privileges associated with system administrators and can also manage accounts and users. An organizational administrator has: + +- All privileges for an organization. +- All privileges for any account in an organization. +- All privileges for any login in an organization. +- All privileges for any service account in an organization. +- All privileges for any network policies in an organization. +- The ability to monitor any usage in the organization. +- The ability to set any organization-related property. + +An organizational administrative role has the **highest** access privileges of all roles in Firebolt, as shown in the following diagram: + +![An org admin has privileges over the entire organization and all of its children.](../../assets/images/manage-organization-org-admin-role.png) + +- **Global authentication method:** Firebolt handles user authentication and access control at the organization level. A login (represented by an email) is created for each user accessing Firebolt. +- **Programmatic access:** [Service accounts](/Guides/managing-your-organization/service-accounts.html) enable programmatic access to Firebolt. +- **Network policy enforcement:** [Network policies](/Guides/security/network-policies.html) provide fine-grain control of IP ranges that are allowed or blocked from accessing an organization. + +## [](#additional-resources)Additional resources + +### [](#billing)Billing + +Firebolt provides billing at the organization level, but gives you billing observability at both organization and account levels. This allows: + +- **Organization-level governance:** Monitor and analyze the overall billing for all accounts to gain insights into the organization’s cost distribution and resource utilization at the organization level. +- **Account-level observability:** Delve into detailed billing information specific to each account, allowing you to track individual accounts’ usage, costs, storage, and compute consumption patterns. + +Firebolt bills are based on the consumption of resources within each account in your organization. This includes the total amount of data stored and engine usage. Learn how to [manage billing](/Guides/managing-your-organization/billing.html). + +### [](#security)Security + +Learn about authentication methods, role-based access control, network policies, and object ownership in [Configure security](/Guides/security/). + +### [](#available-regions)Available regions + +View the [AWS regions](/Reference/available-regions.html) where you can use Firebolt. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_queries.md b/cmd/docs-scrapper/fireboltdocs/overview_queries.md new file mode 100644 index 0000000..7572c43 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_queries.md @@ -0,0 +1,25 @@ +# [](#queries)Queries + +Firebolt is designed to run SQL queries efficiently, using advanced optimizations at both the query and workload levels. This section provides an overview of how Firebolt processes queries, the optimizations applied during query handling, and the tools available for monitoring and improving performance. For guidance on how to run queries and interact with data in Firebolt, see [Query data](/Guides/query-data/). + +## [](#optimizations-on-a-per-query-basis)Optimizations on a per-query basis + +Firebolt applies specific optimizations at the individual query level to ensure efficient performance, even for complex queries. These optimizations take effect immediately when you create an account and start your first engine. + +- [Spilling intermediate query state](/Overview/queries/understand-spilling.html) to the local SSD cache. This allows processing queries whose working set exceeds main memory. +- Firebolt has advanced support for correlated subqueries. These are automatically decorrelated by our query planner to maximize performance. + +## [](#workload-level-optimizations)Workload-level optimizations + +Homogeneous workloads with repeated query structures can benefit significantly from workload-level optimizations. Examples for such workloads are customer-facing, high-concurrency data apps, or internal BI workloads. For these workloads, Firebolt leverages multiple different optimizations. + +- [Reusing query sub-results and result caching](/Overview/queries/understand-query-performance-subresult.html) to reduce redundant calculations across queries. +- [History-based query optimization](/Overview/queries/understand-query-performance-hbs.html), which leverages past query patterns to improve query plans for new queries. + +## [](#query-telemetry-and-monitoring)Query telemetry and monitoring + +Firebolt provides the following tools to monitor and analyze query performance: + +- **`EXPLAIN` command**: View the query plan to understand how Firebolt executes your query. +- **Telemetry data**: Analyze metrics such as runtime, memory usage, and data processed. +- **Query history**: Use views like `information_schema.engine_query_history` to monitor query performance details over time. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_hbs.md b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_hbs.md new file mode 100644 index 0000000..02a9182 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_hbs.md @@ -0,0 +1,58 @@ +# [](#understanding-history-based-optimization)Understanding history-based optimization + +Firebolt’s query plans adapt and evolve with your workload over time. Through a feedback loop, Firebolt’s query optimizer learns from previous queries and progressively generates more efficient plans, steadily improving query performance towards optimal runtimes. + +**Topics** + +- [Scope of history-based optimization](#scope-of-history-based-optimization) – Learn which processes utilize history-based optimization. +- [How it works](#how-it-works) – Learn how telemetry data and query history help optimize future query plans and performance. + + - [How recording query history works](#how-recording-query-history-works) – Learn what is recorded in query history and how information is protected. + - [How planning queries based on history work](#how-planning-queries-based-on-history-work) – Learn how history-based snapshots are continually updated to improve query performance across all engines in the same database and account. +- [Observability](#observability) – Learn how to check if history-based optimization is used in your query plan. + +## [](#scope-of-history-based-optimization)Scope of history-based optimization + +Firebolt’s history-based optimization techniques include the following: + +- **Join ordering** — The sequence in which tables are joined during query processing, especially in complex queries with multiple tables, can significantly affect query performance. Firebolt uses query statistics and historical patterns to determine the most efficient join order that optimizes both run times and resource utilization. +- **Aggregating index selection** — Firebolt analyzes historical query patterns to identify the most frequently used aggregating indexes, which store pre-computed aggregates such as `SUM`, `AVG`, and `COUNT`. It then suggests indexes to reduce computational load during query processing. +- **Semi-join and anti-join planning** — Firebolt’s query planner selects the most efficient way to run semi-joins, which return matching rows, and anti-joins, which return rows with no match in another table. +- **Broadcast and shuffle strategies for multi-node engines** — Firebolt uses a broadcast strategy for small datasets that can be efficiently distributed across all nodes to minimize data movement, and a shuffle strategy when both datasets are large. Implementing these strategies can greatly improve query performance for distributed joins by optimizing how data is distributed and processed across multiple nodes. + +## [](#how-it-works)How it works + +When you run a query, Firebolt’s query optimizer constructs a query plan to run the process efficiently. After running the query, the collected telemetry is fed back into Firebolt’s history snapshot. When you run the next query, the query optimizer accesses the telemetry that was collected in that history snapshot and extracts from it statistical information. Consequently, previously run queries aid the query optimizer in finding better query plans for subsequent queries and thereby improve performance. + +![Telemetry data flows back from the runtime into the query optimizer where the next run is planned.](../../assets/images/history-based-optimization.png) + +### [](#how-recording-query-history-works)How recording query history works + +Firebolt’s query optimizer improves runtime performance by automatically collecting and analyzing telemetry data from past queries, without requiring user intervention. + +Only the query plan and its related telemetry data are stored in the history snapshot. This telemetry includes metrics for each query operator, such as the number of rows and bytes processed, time elapsed, and memory usage. The query plan itself retains the original query string, meaning if the query contains personally identifiable information (PII), it will appear in the snapshot. However, the query result is not used in optimization, so PII in the result is not stored. While PII in the query string is stored, the history snapshot is securely protected. Access is restricted to engines on the same database and account, as well as to authorized Firebolt personnel only. + +### [](#how-planning-queries-based-on-history-work)How planning queries based on history work + +When you submit a query, Firebolt analyzes its recorded history to predict how different query plans will perform, and chooses one that should deliver the best performance. This query history is released to the query optimizer every `10` minutes in the form of a history-based statistics (HBS) snapshot. + +Each engine has a dedicated HBS component that is always active, continuously generating snapshots for the engine to reference during query planning. These snapshots are saved and shared among all engines running on the same database within the same account. Consequently, an engine can benefit from the query history from another engine, improving performance across all engines without needing to restart the analysis process. After engine restart, the engine retrieves the latest HBS snapshot. + +## [](#observability)Observability + +The Firebolt query optimizer uses HBS in the scenarios listed in the scope section above. To see whether history-based statistics were used when planning your query, use `EXPLAIN` with the `statistics` option, as follows: + +``` +EXPLAIN (statistics) +select ... -- your query goes here +``` + +The `EXPLAIN` option `statistics` annotates all nodes of the query plan with the sources of statistics used in its cost estimation. An example of the `EXPLAIN (statistics)` output below shows `source: history`, and we know that the estimates were made based on history. + +```plain +[Logical Profile]: [source: history] +``` + +For more information about output from `EXPLAIN (statistics)`, including various possible values that the source field can take, see [Explain: Example with statistics](../../sql_reference/commands/queries/explain.html#example-with-statistics). + +The `EXPLAIN(statistics)` output will not show `source:history` during the first run of an alternate query plan if the optimizer finds that the initial query plan was not optimal. This absence occurs because the alternative plan has not yet gathered telemetry data during its initial run. In the next execution of the same query, the optimizer understands that it should choose an alternative plan to avoid the slow execution. This means, that history-based statistics are in effect, but we may not see `history` shown in sources of the `EXPLAIN (statistics)` output for the new run. It could be that for the alternative plan we have not collected its execution telemetry yet. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_subresult.md b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_subresult.md new file mode 100644 index 0000000..0a116ad --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_query_performance_subresult.md @@ -0,0 +1,81 @@ +# [](#subresult-reuse-and-result-caching)Subresult reuse and result caching + +*Learn about subresult reuse in Firebolt in more detail in our blog post [Caching & Reuse of Subresults across Queries](https://www.firebolt.io/blog/caching-reuse-of-subresults-across-queries).* + +Workloads using repetitive query patterns can benefit tremendously from reuse and caching. Firebolt can cache subresults from various operators within the query plan, including hash tables from hash-joins. + +**Topics** + +- [How firebolt caches subresults](#how-firebolt-caches-subresults) – An overview about how and when Firebolt caches subresults. +- [Example](#example) – How Firebolt’s subresult cashing and hash table reuse improve query performance for repetitive and partially similar queries. +- [Recognizing subresult reuse in query telemetry](#recognizing-subresult-reuse-in-query-telemetry) – How to view results from `EXPLAIN (ANALYZE)` to check for subresult reuse. +- [Limitations](#limitations) – Understand the situations where subresults cannot be cached or used. +- [Disabling subresult reuse](#disabling-subresult-reuse) – How to turn off subresult caching for benchmarking or other tests. + +## [](#how-firebolt-caches-subresults)How Firebolt caches subresults + +Subresults are placed in an in-memory FireCache, which can use up to 20% of the available RAM. If a sub-plan is reused in a different query, Firebolt’s caching system detects it and can retrieve cached subresults, even if the rest of the query differs. Firebolt uses the following guidelines to determine which subresults to cache: + +- Firebolt’s optimizer may insert a `MaybeCache` operator above any node in the query plan, which may cache a subresult if it isn’t too large. The `MaybeCache` operator may later retrieve and reuse the cached subresult if the same subplan, with the same underlying data, is evaluated again. Currently, the optimizer places a `MaybeCache` operator in the following places: + + - At the top of the query plan to cache the full result. + - At nodes where “sideways information passing” occurs, optimizing joins where the probe-side has an indexed key. + + The `MaybeCache` operator is versatile, and it can be placed anywhere in the plan. +- Firebolt stores subresult hash-tables created for `Join` operators in the FireCache, provided they are not too large. These hash tables are costly to compute, so reusing them when similar consecutive queries run offers significant performance advantages. + +Subresult reuse in Firebolt is fully transactional. When any changes occur in a base table (such as through an `INSERT`, `UPDATE`, or `DELETE`), outdated cache entries are no longer used. + +### [](#example)Example + +The following query, based on the [TPC-H benchmark](https://www.tpc.org/tpch/) schema, calculates the total order price and the number of orders for each nation by joining the `orders`, `customer`, and `nation` tables: + +```SQL +SELECT n_name as nation, SUM(o_totalprice), COUNT(*) + FROM orders, customer, nation + WHERE o_custkey = c_custkey AND c_nationkey = n_nationkey + GROUP BY ALL; +``` + +The simplified plan for this example query would look like the following: + +![A query plan using subresult reuse.](../../assets/images/subresult_reuse.png) + +In this example, a `MaybeCache` operator positioned at the top of the plan caches the subresult from the first run into the FireCache. Additionally, both `Join` operators store their respective hash tables in the cache. On a subsequent run of exactly the same query (over unchanged data), the `MaybeCache` operator fetches the subresult from the cache, allowing the entire evaluation to be skipped. As a result, query latency is reduced to mere milliseconds. In this example, it leads to a speed improvement of over 100x on a single node, medium engine running TPC-H with scale factor of 100. + +If the `WHERE` condition is changed to add `... AND o_orderdate >= '1998-01-01'::Date ...`, the subresult cached by the `MaybeCache` operator cannot be used because the query plan below it has changed. However, the subplan below the upper `Join` remains unchanged, allowing the previously cached hash table to be reused in that `JOIN` operator. This eliminates the need to re-evaluate the subplan and rebuild the hash table. This results in more than 5x speed improvement on subsequent queries, even when each query has a different date restriction. + +### [](#recognizing-subresult-reuse-in-query-telemetry)Recognizing subresult reuse in query telemetry + +Firebolt transparently leverages subresult reuse. If you want to see whether subresult reuse helped to speed up your query, look for `Nothing was executed` in the [EXPLAIN (ANALYZE)](/sql_reference/commands/queries/explain.html) output. This shows that an operator was skipped because a higher level operator retrieved the subresult from the FireCache. For example, in the following `EXPLAIN (ANALYZE)` output, the `MaybeCache` operator retrieved the result from the cache, bypassing the need to run the entire query: + +``` +[0] [MaybeCache] +| [RowType]: date not null, text not null, bigint not null, double precision null +| [Execution Metrics]: output cardinality = 10000, thread time = 0ms, cpu time = 0ms + \_[1] [Projection] ref_2, ref_1, ref_0, ref_3 + | [RowType]: date not null, text not null, bigint not null, double precision null + | [Execution Metrics]: Optimized out + \_[2] [SortMerge] OrderBy: [ref_2 Ascending Last, ref_1 Ascending Last, ref_0 Ascending Last, ref_3 Ascending Last] Limit: [10000] + | [RowType]: bigint not null, text not null, date not null, double precision null + | [Execution Metrics]: Nothing was executed +[...] +``` + +### [](#limitations)Limitations + +Firebolt supports subresult caching for as many queries as possible. The following are specific limitations where subresult caching cannot be applied: + +- **Result cache size** – The result cache is limited 1 MB per result to ensure that large results do not evict smaller cached subresults needed for other queries. +- **Nondeterministic functions** – Queries that use nondeterministic functions such as `RANDOM` cannot cache subresults. If an operator in the query plan depends directly or indirectly on the output of a nondeterministic function, caching is disabled. +- **External table scans** – Results from external table scans cannot be cached. These tables rely on external data sources, which may change independently of Firebolt’s caching mechanism. +- **Non-equality joins** – Cross joins and joins that use a join condition that’s not an equality (for example, joining on `left_side.colum1 < right_side.column2`) cannot use subresult caching directly. The result cache can still be used for queries using such joins. + +### [](#disabling-subresult-reuse)Disabling subresult reuse + +Firebolt exposes [system settings](/Reference/system-settings.html) that allow turning off subresult caching at a per-query basis: + +- Setting `enable_result_cache` to `FALSE` ensures that full query results aren’t retrieved from cache, while still allowing for semantic cross-query subresult reuse. +- Setting `enable_subresult_cache` to `FALSE` disables Firebolt’s entire subresult caching layer. + +For most benchmarking scenarios, disable the result cache. This approach affects only the final result caching while preserving the benefits of cross-query subresult optimizations. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_spilling.md b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_spilling.md new file mode 100644 index 0000000..1da30b6 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_queries_understand_spilling.md @@ -0,0 +1,47 @@ +# [](#understand-spilling)Understand spilling + +Most query execution operators in Firebolt can react dynamically to memory pressure and take measures to prevent query failure. While on a technical level there are two different mechanisms at play, they share the same goal: allowing an engine to successfully finish the vast majority of queries that would otherwise fail with an “Out of memory” error. Both mechanisms (called “spilling” and “flushing”) are applied automatically when necessary without any user intervention. + +Firebolt implements spilling for aggregations and joins, and we will explore it in more detail in the sections below. + +## [](#how-spilling-works)How spilling works + +The fundamental idea of spilling is to write intermediate state to disk in a partitioned way when the data set is too big to process entirely in memory, and then process the partitions in memory one by one. The following sketch illustrates this: + +![If a working set exceeds memory, it can be processed if split into four partitions.](../../assets/images/spilling.png) + +Spilling-enabled operators continuously monitor how much memory remains available for use on each node of the engine. When they detect that the available amount is likely to be insufficient to complete the query, they temporarily stop accepting new data and start spilling part or all of the state that they have built up to the local SSD cache. This frees up memory to continue processing input data. If the available memory once again falls below the amount required to finish the operation, the operator spills additional parts of its state. Once the operator has read all of its input data, it needs to read the spilled data back from the SSD cache to produce the correct output. Because we already know that we do not have enough memory to process everything in memory, we read the spilled data *partition by partition*. These partitions are designed to be processed individually, without needing to refer to any other partitions to produce the operator’s result for a given partition. By using 256 partitions, we can process up to 200 times more data with spilling than in memory. + +Note that spilling only kicks in when absolutely necessarily. This makes sure that when the working set of a query fits fully into main-memory, it is executed as efficiently as possible. + +## [](#implications-of-spilling)Implications of spilling + +When an operator spills, it has to write a significant amount of data to the SSD cache and later read it back. This comes at a performance cost. If the engine has only slightly too little memory, using a bigger engine – whether by scaling up or by scaling out – can avoid the need for spilling and result in significantly reduced latency. + +## [](#observability)Observability + +If a query made use of spilling, you will see a non-zero value in the `spilled_bytes` column of [information\_schema.engine\_query\_history](/sql_reference/information-schema/engine-query-history.html) describing how many bytes were spilled to the SSD cache in total by all spilling operators in the query. Additionally, you can monitor the amount of spilled data on the SSD cache at any particular time in the `spilled_bytes` column of [information\_schema.engine\_metrics\_history](/sql_reference/information-schema/engine-metrics-history.html) or in the “engine monitoring” section of the Firebolt Web Interface. + +## [](#example)Example + +You can easily test spilling for yourself with the following query on an engine with a single node of type “small”: + +``` +SELECT min(x), max(x), x FROM generate_series(1, 1000000000) r(x) GROUP BY x; +``` + +This aggregation uses one billion groups, and the aggregate state is too large to process in memory on a single small node. As a result, the engine spills some data to disk (around 8 GB) before merging it again. You can observe how the `spilled_bytes` metric in `information_schema.engine_metrics_history` increases while the aggregation processes and spills input, and then starts decreasing again as spilled data is merged and deleted. + +## [](#limitations)Limitations + +Not all queries can benefit from spilling. Spilling is not implemented for the `ORDER BY` operator and for [window functions](/sql_reference/functions-reference/window/). Additionally, aggregations where the size of a single key’s state exceeds the available memory, such as a `count(distinct)` without a `GROUP BY` clause on a column with a huge amount of distinct values, cannot currently spill. Such queries could still fail with an out-of-memory error if spilling in other queries or operators cannot free up enough memory for these non-spilling-enabled operators. + +Spilling is also limited by the available SSD cache. Once the cache is full and no non-essential data remains to be evicted from cache, spilling cannot proceed, and the query will fail. Of course, any leftover spilled files will be cleaned up in such cases, ensuring that the SSD cache capacity is made available again for caching or spilled data from other queries. + +## [](#technical-details-spilling-vs-flushing)Technical details: spilling vs. flushing + +Many operators, such as filters or projections (function evaluations), do not keep significant amounts of data in memory at any given time. + +Some operators have an over-pressure relief valve and can *flush* excess data. This applies to both insertion operators, which can reduce the amount of tablet merging applied during insert and thereby reduce their memory requirements, as well as partial aggregations in a distributed aggregation plan, which can flush their partially aggregated data to the merge stage early to continue the aggregation there. Because distributed aggregations are partitioned, this distributes the excess load over the cluster, and additionally reduces total memory usage because the merge stage eliminates any duplication found in the different nodes’ inputs. + +Other operators do not have this ability, and must *spill* data to the SSD cache to avoid going out of memory and later process the data piece by piece to produce the result. This affects all operators that need to fully process the input data before they can start producing output. Firebolt implements this for the joins as well as the merge stage of aggregations. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control.md new file mode 100644 index 0000000..940964c --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control.md @@ -0,0 +1,60 @@ +# [](#role-based-access-control-rbac)Role-Based Access Control (RBAC) + +Firebolt uses Role-Based Access Control (RBAC) to manage permissions and ensure that users and roles have only the necessary access to perform operations within the system. RBAC follows the [principle of least privilege](https://en.wikipedia.org/wiki/Principle_of_least_privilege), where access is restricted to the minimum required for tasks. + +Permissions in Firebolt are managed through roles, which control access to databases, schemas, tables, engines, and other objects. Permissions propagate from higher-level objects to their related objects, simplifying access management. + +With RBAC you can: + +- Assign roles to users and other roles to streamline permissions. +- Grant or revoke access at **global**, **regional**, or **object-specific** levels. +- Control operations across your Firebolt environment, such as managing data, creating resources, or executing queries. + +## [](#firebolts-hierarchical-object-model-and-rbac-structure)Firebolt’s hierarchical object model and RBAC structure + +Firebolt uses an object model to organize resources in a way that complements how organizations manage their data warehouses. This model enforces a one-to-many structure where: + +- An object can encompass multiple related objects beneath it. +- Each related object is associated with a single higher-level object and cannot be shared across multiple higher-level objects. +- Permissions flow from higher-level objects to related objects. For example, granting a role usage on a database also provides access to all schemas and tables within that database. + +Objects in the Firebolt object model are securable and come with a set of permissions, enabling administrators to control what identities they have access to when accessing their Firebolt cluster. + +For more information about the organizational and account structure in Firebolt’s object model , see [Organization and accounts](/Overview/organizations-accounts.html). + +## [](#key-object-types)Key object types + +Firebolt divides objects into **global** and **regional** types, depending on their scope and management level. + +- **Global objects**: Managed globally at the [organization](/Overview/organizations-accounts.html#organizations) level, they can contain objects that are deployed and grouped regionally, including the following: + + - Network Policies + - Logins + - Service Accounts + - Accounts +- **Regional objects**: Tied to specific regions grouped under an [account](/Overview/organizations-accounts.html#accounts), they can include the following: + + - Users + - Roles + - Databases + + - Schemas + + - External Tables + - Managed Tables + - Views + - Indexes + - Engines + +Firebolt provides the [organization\_admin](/Overview/organizations-accounts.html#organizational-administrative-role) role to manage organizational resources. While granular RBAC is currently only available at the account level, Firebolt plans to include making RBAC available at the organizational level in a future release. + +For more information about Firebolt’s RBAC model & how to administer your Firebolt cluster, access the sections below: + +* * * + +- [Database permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/) +- [Account Permissions](/Overview/Security/Role-Based%20Access%20Control/account-permissions.html) +- [Engine Permissions](/Overview/Security/Role-Based%20Access%20Control/engine-permissions.html) +- [Ownership](/Overview/Security/Role-Based%20Access%20Control/ownership.html) +- [Check Assigned Privileges](/Overview/Security/Role-Based%20Access%20Control/check-privileges.html) +- [Role Management](/Overview/Security/Role-Based%20Access%20Control/role-management/) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_account_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_account_permissions.md new file mode 100644 index 0000000..b2d4194 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_account_permissions.md @@ -0,0 +1,105 @@ +# [](#account-permissions)Account permissions + +Accounts represent the physical instance of your data warehouse in Firebolt and are created in Firebolt-supported regions. All objects within an account—such as databases, engines, roles, and users—are scoped to that specific account. + +Privilege Description GRANT Syntax REVOKE Syntax CREATE DATABASE Allows creating new databases in an account. `GRANT CREATE DATABASE ON ACCOUNT TO ;` `REVOKE CREATE DATABASE ON ACCOUNT FROM ;` USAGE ANY DATABASE Allows using all current and future databases in an account. `GRANT USAGE ANY DATABASE ON ACCOUNT TO ;` `REVOKE USAGE ANY DATABASE ON ACCOUNT FROM ;` MODIFY ANY DATABASE Allows editing all current and future databases in an account. `GRANT MODIFY ANY DATABASE ON ACCOUNT TO ;` `REVOKE MODIFY ANY DATABASE ON ACCOUNT FROM ;` [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) Allows creating new engines in an account. `GRANT CREATE ENGINE ON ACCOUNT TO ;` `REVOKE CREATE ENGINE ON ACCOUNT FROM ;` USAGE ANY ENGINE Allows using all current and future engines in an account. `GRANT USAGE ANY ENGINE ON ACCOUNT TO ;` `REVOKE USAGE ANY ENGINE ON ACCOUNT FROM ;` OPERATE ANY ENGINE Allows starting and stopping all current and future engines in the account. `GRANT OPERATE ANY ENGINE ON ACCOUNT TO ;` `REVOKE OPERATE ANY ENGINE ON ACCOUNT FROM ;` MODIFY ANY ENGINE Allows editing all current and future engines in the account. `GRANT MODIFY ANY ENGINE ON ACCOUNT TO ;` `REVOKE MODIFY ANY ENGINE ON ACCOUNT FROM ;` [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) Allows creating new roles in the account. `GRANT CREATE ROLE ON ACCOUNT TO ;` `REVOKE CREATE ROLE ON ACCOUNT FROM ;` MODIFY ANY ROLE Allows editing all current and future roles in the account. `GRANT MODIFY ANY ROLE ON ACCOUNT TO ;` `REVOKE MODIFY ANY ROLE ON ACCOUNT FROM ;` [CREATE USER](/sql_reference/commands/access-control/create-user.html) Allows creating new users in the account. `GRANT CREATE USER ON ACCOUNT TO ;` `REVOKE CREATE USER ON ACCOUNT FROM ;` MODIFY ANY USER Allows editing all current and future users in the account. `GRANT MODIFY ANY USER ON ACCOUNT TO ;` `REVOKE MODIFY ANY USER ON ACCOUNT FROM ;` MONITOR \[ANY USAGE] Enables the tracking of engine queries through the `engine_running_queries` view for active queries and the `engine_query_history` view for past queries in `information_schema`. `GRANT MONITOR ANY USAGE ON ACCOUNT TO ;` `REVOKE MONITOR ANY USAGE ON ACCOUNT FROM ;` ALL \[PRIVILEGES] Grants all direct privileges for a specified account to a specified role. `GRANT ALL ON ACCOUNT TO ;` `REVOKE ALL ON ACCOUNT FROM ;` + +Revoking a privilege removes it from a role but does not explicitly deny the privilege. If the privilege was not previously granted, revoking it has no effect. + +## [](#examples-of-granting-account-level-permissions)Examples of granting account-level permissions + +### [](#create-database-permission)CREATE DATABASE permission + +The following code example [grants](/sql_reference/commands/access-control/grant.html) the role `developer_role` permission to create new databases within the `account_name`: + +``` +GRANT CREATE DATABASE ON ACCOUNT account_name TO developer_role; +``` + +### [](#usage-any-database-permission)USAGE ANY DATABASE permission + +The following code example gives permission to the role `developer_role` to access all current and future databases within the `account_name`: + +``` +GRANT USAGE ANY DATABASE ON ACCOUNT account_name TO developer_role; +``` + +### [](#modify-any-database-permission)MODIFY ANY DATABASE permission + +The following code example grants the role `developer_role` permission to modify or delete all current and future databases within the `account_name`: + +``` +GRANT MODIFY ANY DATABASE ON ACCOUNT account_name TO developer_role; +``` + +### [](#create-engine-permission)[CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) permission + +The following code example gives the role `developer_role` permission to create new engines within the `account_name`: + +``` +GRANT CREATE ENGINE ON ACCOUNT account_name TO developer_role; +``` + +### [](#usage-any-engine-permission)USAGE ANY ENGINE permission + +The following code example grants the role `developer_role` permission to use all current and future engines within the `account_name`: + +``` +GRANT USAGE ANY ENGINE ON ACCOUNT account_name TO developer_role; +``` + +### [](#operate-any-engine-permission)OPERATE ANY ENGINE permission + +The following code example gives the role `developer_role` permission to start and stop all current and future engines within the `account_name`: + +``` +GRANT USAGE ANY DATABASE ON ACCOUNT account_name TO developer_role; +``` + +### [](#modify-any-engine-permission)MODIFY ANY ENGINE permission + +The following code example grants the role `developer_role` permission to modify or delete all current and future engines within the `account_name`: + +``` +GRANT MODIFY ANY ENGINE ON ACCOUNT account_name TO developer_role; +``` + +### [](#create-role-permission)[CREATE ROLE](/sql_reference/commands/access-control/create-role.html) permission + +The following code example gives the role `developer_role` permission to create new roles within the `account_name`: + +``` +GRANT CREATE ROLE ON ACCOUNT account_name TO developer_role; +``` + +### [](#modify-any-role-permission)MODIFY ANY ROLE permission + +The following code example grants the role `developer_role` permission to modify or delete all current and future roles within the `account_name`: + +``` +GRANT MODIFY ANY ROLE ON ACCOUNT account_name TO developer_role; +``` + +### [](#create-user-permission)[CREATE USER](/sql_reference/commands/access-control/create-user.html) permission + +The following code example gives the role `developer_role` permission to create new users within the `account_name`: + +``` +GRANT CREATE USER ON ACCOUNT account_name TO developer_role; +``` + +### [](#modify-any-user-permission)MODIFY ANY USER permission + +The following code example grants the role `developer_role` permission to modify or delete all current and future users within the `account_name`: + +``` +GRANT MODIFY ANY USER ON ACCOUNT account_name TO developer_role; +``` + +### [](#monitor-any-usage-permission)MONITOR \[ANY USAGE] permission + +The following code example grants the role `developer_role` permission to see the query history and currently running queries on all the engines within `account_name`: + +``` +GRANT MONITOR ANY USAGE ON ACCOUNT "account-1" TO developer_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_check_privileges.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_check_privileges.md new file mode 100644 index 0000000..5ca45c2 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_check_privileges.md @@ -0,0 +1,81 @@ +# [](#check-assigned-privileges)Check assigned privileges + +The Firebolt information schema provides system views that allow you to view metadata and permissions for objects within your current account or database. This page explains how to query and validate user and role privileges at both the **account** and **database** levels. + +## [](#viewing-effective-privileges)Viewing effective privileges + +The following code example shows how to view the effective privileges of the current user: + +``` +SELECT + AR.grantee, + AR.role_name, + OP.privilege_type, + OP.object_type, + OP.object_name +FROM information_schema.transitive_applicable_roles AS AR +JOIN information_schema.object_privileges AS OP +ON (AR.role_name = OP.grantee) +WHERE + AR.grantee = session_user(); +``` + +## [](#usage-examples)Usage examples + +The following examples demonstrate how to validate privileges at both the **account** level and the **database** level. By running a query against the `information_schema` views, you can check the effective permissions granted to a user or role. Each scenario includes an example query and output to illustrate the scope of the retrieved privileges. + +### [](#validating-privileges-at-the-account-level)Validating privileges at the account level + +If no database is selected, the query runs at the account level and shows account-scoped privileges. + +**Example** + +In the following code example, a user `test_user` with an [account\_admin](/Overview/organizations-accounts.html#account-administrative-role) role retrieves their privileges and associated roles for their current user session: + +``` +SELECT + AR.grantee, + AR.role_name, + OP.privilege_type, + OP.object_type, + OP.object_name +FROM information_schema.transitive_applicable_roles AS AR +JOIN information_schema.object_privileges AS OP +ON (AR.role_name = OP.grantee) +WHERE + AR.grantee = session_user(); +``` + +**Returns** + +Grantee role\_name privilege\_type object\_type object\_name test\_user account\_admin `MODIFY ANY ENGINE` account account-1 test\_user account\_admin `MODIFY ANY DATABASE` account account-1 test\_user account\_admin `OPERATE ANY ENGINE` account account-1 test\_user account\_admin `MODIFY ANY ROLE` account account-1 test\_user account\_admin `USAGE ANY DATABASE` account account-1 test\_user account\_admin `MONITOR ANY USAGE` account account-1 test\_user account\_admin `MANAGE GRANTS` account account-1 test\_user account\_admin `USAGE ANY ENGINE` account account-1 test\_user account\_admin `MODIFY ANY USER` account account-1 test\_user account\_admin `METER USAGE` account account-1 test\_user account\_admin `CREATE SCHEMA` database UltraFast test\_user account\_admin `CREATE USER` account account-1 test\_user account\_admin `CREATE DATABASE` account account-1 test\_user account\_admin `CREATE ROLE` account account-1 test\_user account\_admin `CREATE ENGINE` account account-1 + +The previous table confirms that `test_user` has account-level privileges, such as permission to create engines, roles, and databases, as well as permission to modify users and engines. + +### [](#validating-privileges-at-the-database-level)Validating privileges at the database level + +When a specific database is selected, the query retrieves privileges scoped to that database. + +**Example** + +The following code example retrieves the applicable roles and associated privileges, object types, and object names for `test_user`, who holds an `account_admin` role from the information schema. + +``` +SELECT + AR.grantee, + AR.role_name, + OP.privilege_type, + OP.object_type, + OP.object_name +FROM information_schema.transitive_applicable_roles AS AR +JOIN information_schema.object_privileges AS OP +ON (AR.role_name = OP.grantee) +WHERE + AR.grantee = session_user(); +``` + +**Returns** + +Grantee role\_name privilege\_type object\_type object\_name test\_user account\_admin `SELECT ANY` schema public test\_user account\_admin `DELETE ANY` schema public test\_user account\_admin `VACUUM ANY` schema public test\_user account\_admin `INSERT ANY` schema public test\_user account\_admin `MODIFY` schema public test\_user account\_admin `CREATE` schema public test\_user account\_admin `USAGE` schema public test\_user account\_admin `MODIFY ANY` schema public test\_user account\_admin `TRUNCATE ANY` schema public + +The previous output confirms that `test_user` has database-level privileges, such as `SELECT`, `INSERT`, `DELETE`, and schema-level `MODIFY` permissions. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions.md new file mode 100644 index 0000000..aba21c9 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions.md @@ -0,0 +1,59 @@ +# [](#database-permissions)Database permissions + +In Firebolt, a **database** is a logical container that organizes your data warehouse by holding components such as **tables**, **views**, **indexes**, and other database objects, as shown in the following diagram: + +![Firebolt's object model contains schema under databases, and tables, views, and indexes under schema.](../../../../assets/images/database-hierarchy.png) + +Database-level permissions define what actions roles can perform within a database and its associated objects. + +## [](#database-level-privileges)Database-level privileges + +Privilege Description GRANT Syntax REVOKE Syntax USAGE Allows access to the database and enables attaching engines to it. `GRANT USAGE ON DATABASE TO ;` `REVOKE USAGE ON DATABASE FROM ;` MODIFY Allows altering database properties and dropping the database. `GRANT MODIFY ON DATABASE TO ;` `REVOKE MODIFY ON DATABASE FROM ;` USAGE ANY SCHEMA Allows access to all current and future schemas within the database. `GRANT USAGE ANY SCHEMA ON DATABASE TO ;` `REVOKE USAGE ANY SCHEMA ON DATABASE FROM ;` [VACUUM](/sql_reference/commands/data-management/vacuum.html) ANY Allows running the `VACUUM` operation on all current and future tables. `GRANT VACUUM ANY ON DATABASE TO ;` `REVOKE VACUUM ANY ON DATABASE FROM ;` ALL \[PRIVILEGES] Grants all direct privileges over the database to a role. `GRANT ALL ON DATABASE TO ;` `REVOKE ALL ON DATABASE FROM ;` + +## [](#examples-of-granting-database-permissions)Examples of granting database permissions + +### [](#usage-permission)USAGE permission + +The following code example [grants](/sql_reference/commands/access-control/grant.html) the role `developer_role` access to use the specified database: + +``` +GRANT USAGE ON DATABASE "database-1" TO developer_role; +``` + +### [](#modify-permission)MODIFY permission + +The following code example gives the role `developer_role` permission to alter properties or drop the specified database: + +``` +GRANT MODIFY ON DATABASE "database-1" TO developer_role; +``` + +### [](#usage-any-schema-permission)USAGE ANY SCHEMA permission + +The following code example grants the role `developer_role` access to all current and future schemas within the specified database: + +``` +GRANT USAGE ANY SCHEMA ON DATABASE "database-1" TO developer_role; +``` + +### [](#vacuum-any-permission)VACUUM ANY permission + +The following code example gives the role `developer_role` permission to run [VACUUM](/sql_reference/commands/data-management/vacuum.html) operations on all current and future tables in the specified database: + +``` +GRANT VACUUM ANY ON DATABASE "database-1" TO developer_role; +``` + +### [](#all-permissions)ALL permissions + +The following code example gives the role `developer_role` all the direct permissions over database `database-1`: + +``` +GRANT ALL ON DATABASE "database-1" TO developer_role; +``` + +* * * + +- [Schema permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/schema-permissions.html) +- [Table permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/table-permissions.html) +- [View permissions](/Overview/Security/Role-Based%20Access%20Control/database-permissions/view-permissions.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_schema_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_schema_permissions.md new file mode 100644 index 0000000..d9c1b99 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_schema_permissions.md @@ -0,0 +1,99 @@ +# [](#schema-permissions)Schema permissions + +In Firebolt, a **schema** is a logical namespace within a database that organizes **tables**, **views**, and other objects. Schema-level permissions allow roles to perform specific actions, such as accessing, modifying, or managing objects within a schema. + +To perform actions on a schema or its objects, the role must also have the **USAGE** privilege on the schema’s parent database. + +## [](#schema-level-privileges)Schema-level privileges + +Privilege Description GRANT Syntax REVOKE Syntax USAGE Allows access to the schema and its objects `GRANT USAGE ON SCHEMA public IN TO ;` `REVOKE USAGE ON SCHEMA public IN FROM ;` MODIFY Allows altering the schema properties, including renaming or dropping the schema them. `GRANT MODIFY ON SCHEMA public IN TO ;` `REVOKE MODIFY ON SCHEMA public IN FROM ;` CREATE Allows creating new objects, such as tables and views, within the schema. `GRANT CREATE ON SCHEMA public IN TO ;` `REVOKE CREATE ON SCHEMA public IN FROM ;` [DELETE](/sql_reference/commands/data-management/delete.html) ANY Allows deleting rows and partitions from all current and future tables. `GRANT DELETE ANY ON SCHEMA public IN TO ;` `REVOKE DELETE ANY ON SCHEMA public IN FROM ;` [INSERT](/sql_reference/commands/data-management/insert.html) ANY Allows inserting rows into all current and future tables within the schema. `GRANT INSERT ANY ON SCHEMA public IN TO ;` `REVOKE INSERT ANY ON SCHEMA public IN FROM ;` [UPDATE](/sql_reference/commands/data-management/update.html) ANY Allows updating rows in all current and future tables within the schema. `GRANT UPDATE ANY ON SCHEMA public IN TO ;` `REVOKE UPDATE ANY ON SCHEMA public IN FROM ;` [TRUNCATE](/sql_reference/commands/data-management/truncate-table.html) ANY Allows truncating all current and future tables within the schema. `GRANT TRUNCATE ANY ON SCHEMA public IN TO ;` `REVOKE TRUNCATE ANY ON SCHEMA public IN FROM ;` [VACUUM](/sql_reference/commands/data-management/vacuum.html) ANY Allows running the `VACUUM` operation on all current and future tables. `GRANT VACUUM ANY ON SCHEMA public IN TO ;` `REVOKE VACUUM ANY ON SCHEMA public IN FROM ;` MODIFY ANY Allows modifying or dropping all current and future objects in the schema. `GRANT MODIFY ANY ON SCHEMA public IN TO ;` `REVOKE MODIFY ANY ON SCHEMA public IN FROM ;` SELECT ANY Allows reading data from all current and future objects within the schema. `GRANT SELECT ANY ON SCHEMA public IN TO ;` `REVOKE SELECT ANY ON SCHEMA public IN FROM ;` ALL \[PRIVILEGES] Grants all direct privileges over the schema to a role. `GRANT ALL ON SCHEMA public IN TO ;` `REVOKE ALL ON SCHEMA public IN FROM ;` + +## [](#examples-of-granting-schema-permissions)Examples of granting schema permissions + +### [](#usage-permission)USAGE permission + +The following code example [grants](/sql_reference/commands/access-control/grant.html) the role `developer_role` permission to use the specified schema. + +``` +GRANT USAGE ON SCHEMA "public" TO developer_role; +``` + +### [](#modify-permission)MODIFY permission + +The following code example gives the role `developer_role` permission to alter properties or drop the specified schema. + +``` +GRANT MODIFY ON SCHEMA "public" TO developer_role; +``` + +### [](#create-permission)CREATE permission + +The following code example grants the role `developer_role` the ability to create new objects in the specified schema: + +``` +GRANT CREATE ON SCHEMA "public" TO developer_role; +``` + +### [](#delete-any-permission)DELETE ANY permission + +The following code example gives the role `developer_role` permission to [delete](/sql_reference/commands/data-management/delete.html) rows and partitions from all current and future tables in the specified schema: + +``` +GRANT DELETE ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#insert-any-permission)INSERT ANY permission + +The following code example grants the role `developer_role` permission to [insert](/sql_reference/commands/data-management/insert.html) rows into all current and future tables in the specified schema: + +``` +GRANT INSERT ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#update-any-permission)UPDATE ANY permission + +The following code example gives the role `developer_role` permission to [update](/sql_reference/commands/data-management/update.html) rows in all current and future tables in the specified schema: + +``` +GRANT UPDATE ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#truncate-any-permission)TRUNCATE ANY permission + +The following code example grants the role `developer_role` the ability to [truncate](/sql_reference/commands/data-management/truncate-table.html) all current and future tables in the specified schema: + +``` +GRANT TRUNCATE ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#vacuum-any-permission)VACUUM ANY permission + +The following code example gives the role `developer_role` permission to run [`VACUUM`](/sql_reference/commands/data-management/vacuum.html) operations on all current and future tables in the specified schema: + +``` +GRANT VACUUM ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#modify-any-permission)MODIFY ANY permission + +The following code example grants the role `developer_role` permission to modify or drop all current and future objects in the specified schema: + +``` +GRANT MODIFY ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#select-any-permission)SELECT ANY permission + +The following code example gives the role `developer_role` permission to select data from all current and future objects in the specified schema: + +``` +GRANT SELECT ANY ON SCHEMA "public" TO developer_role; +``` + +### [](#all-permissions)ALL permissions + +The following code example gives the role `developer_role` all the direct permissions over schema `public`: + +``` +GRANT ALL ON SCHEMA "public" TO developer_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_table_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_table_permissions.md new file mode 100644 index 0000000..d1ac525 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_table_permissions.md @@ -0,0 +1,101 @@ +# [](#table-permissions)Table permissions + +In Firebolt, a **table** is a structured data object within a database, composed of rows and columns. Tables are the foundational units for organizing, querying, and managing data in your Firebolt data warehouse. Table-level permissions allow roles to perform actions such as selecting, modifying, or managing data within specific tables. + +To perform actions on a table, roles must also have **USAGE** permissions on both the parent schema and the parent database of the table. + +## [](#table-level-privileges)Table-level privileges + +Privilege Description GRANT Syntax REVOKE Syntax SELECT Allows selecting rows from the table. `GRANT SELECT ON TABLE TO ;` `REVOKE SELECT ON TABLE FROM ;` [INSERT](/sql_reference/commands/data-management/insert.html) Allows inserting rows into the table. Applies to managed tables only. `GRANT INSERT ON TABLE TO ;` `REVOKE INSERT ON TABLE FROM ;` MODIFY Allows modifying and dropping the table. `GRANT MODIFY ON TABLE TO ;` `REVOKE MODIFY ON TABLE FROM ;` [DELETE](/sql_reference/commands/data-management/delete.html) Allows deleting rows and dropping partitions from the table. Applies to managed tables only. `GRANT DELETE ON TABLE “” TO ;` `REVOKE DELETE ON TABLE “” FROM ;` [UPDATE](/sql_reference/commands/data-management/update.html) Allows updating rows in the table. Applies to managed tables only. `GRANT UPDATE ON TABLE TO ;` `REVOKE UPDATE ON TABLE FROM ;` [TRUNCATE](/sql_reference/commands/data-management/truncate-table.html) Allows truncating a table. Applies to managed tables only. `GRANT TRUNCATE ON TABLE TO ;` `REVOKE TRUNCATE ON TABLE FROM ;` [VACUUM](/sql_reference/commands/data-management/vacuum.html) Allows running the `VACUUM` operation. Applies to managed tables only. `GRANT VACUUM ON TABLE TO ;` `REVOKE VACUUM ON TABLE FROM ;` ALL \[PRIVILEGES] Grants all privileges over the table to a role. `GRANT ALL ON TABLE TO ;` `REVOKE ALL ON TABLE FROM ;` + +To grant permissions across all tables in a schema, use [schema-level privileges](/Overview/Security/Role-Based%20Access%20Control/database-permissions/schema-permissions.html). For example, privileges like **SELECT ANY**, **INSERT ANY**, or **DELETE ANY** at the schema level will apply to all current and future tables within that schema. + +## [](#aggregating-indexes)Aggregating Indexes + +An [aggregating index](/Overview/indexes/aggregating-index.html) in Firebolt accelerates queries involving aggregate functions on large tables. This reduces compute usage and improves query performance. + +To **create** or **drop** an aggregating index, a role must have the following permissions: + +- `MODIFY` permission on the table. +- `CREATE` permission on the parent schema. +- `USAGE` permission on the parent schema. +- `USAGE` permission on the parent database. + +To drop an aggregating index, the role requires: + +- `MODIFY` permission on the table. +- `USAGE` permission on the parent schema. +- `USAGE` permission on the parent database. + +## [](#examples-of-modifying-table-permissions)Examples of modifying table permissions + +The following example use [`GRANT`](/sql_reference/commands/access-control/grant.html) to grant permissions. You can also replace `GRANT` with [REVOKE](/sql_reference/commands/access-control/revoke.html) in any of the examples to remove any granted privileges. + +### [](#select-permission)SELECT permission + +The following code example [grants](/sql_reference/commands/access-control/grant.html) the role `developer_role` permission to read data from the `games` table: + +``` +GRANT SELECT ON TABLE games TO developer_role; +``` + +### [](#insert-permission)INSERT permission + +The following code example gives the role `developer_role` permissions to [insert](/sql_reference/commands/data-management/insert.html) rows into the `games` table: + +``` +GRANT INSERT ON TABLE games TO developer_role; +``` + +### [](#modify-permission)MODIFY permission + +The following code example grants the role `developer_role` permission to alter or drop the `games` table: + +``` +GRANT MODIFY ON TABLE games TO developer_role; +``` + +### [](#delete-permission)DELETE permission + +The following code example gives the role `developer_role` permission to [delete](/sql_reference/commands/data-management/delete.html) rows or partitions from the `games` table: + +``` +GRANT DELETE ON TABLE games TO developer_role; +``` + +### [](#update-permission)UPDATE permission + +The following code example grants the role `developer_role` permission to [update](/sql_reference/commands/data-management/update.html) rows in the `games` table: + +``` +GRANT UPDATE ON TABLE games TO developer_role; +``` + +### [](#truncate-permission)TRUNCATE permission + +The following code example gives the role `developer_role` permission to [truncate](/sql_reference/commands/data-management/truncate-table.html) the `games` table, removing all rows: + +``` +GRANT TRUNCATE ON TABLE games TO developer_role; +``` + +### [](#vacuum-permission)VACUUM permission + +The following code example grants the role `developer_role` permission to run the [`VACUUM`](/sql_reference/commands/data-management/vacuum.html) operation on the `games` table: + +``` +GRANT VACUUM ON TABLE games TO developer_role; +``` + +### [](#all-permissions)ALL permissions + +The following code example grants the role `developer_role` with all permissions on the table `games`: + +``` +GRANT ALL ON TABLE games TO developer_role; +``` + +## [](#considerations)Considerations + +- Use the [REVOKE](/sql_reference/commands/access-control/revoke.html) statement to remove any granted privileges. Replace [`GRANT`](/sql_reference/commands/access-control/grant.html) with [`REVOKE`](/sql_reference/commands/access-control/revoke.html) in the examples above. +- Table-level permissions apply only to the specified table. For broader control, consider granting schema-level privileges. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_view_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_view_permissions.md new file mode 100644 index 0000000..a131472 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_database_permissions_view_permissions.md @@ -0,0 +1,45 @@ +# [](#view-permissions)View permissions + +In Firebolt, **views** are objects that allow users to query data from one or more underlying tables or views. Permissions on these views determine who can interact with the view and what actions they can perform. + +To interact with a view, roles must also have **USAGE** permissions on the parent schema and the parent database. + +## [](#view-level-privileges)View-level privileges + +Privilege Description GRANT Syntax REVOKE Syntax   SELECT Allows selecting data from a view. `GRANT SELECT ON VIEW TO ;` `REVOKE SELECT ON VIEW FROM ;`   MODIFY Allows modifying and dropping a view. `GRANT MODIFY ON VIEW TO ;` `REVOKE MODIFY ON VIEW FROM ;`   ALL \[PRIVLEGES] Grants all privileges over the view to a role. `GRANT ALL ON VIEW TO ;` `REVOKE ALL ON VIEW FROM ;`   + +Views are created at the schema level. To grant privileges to create views, refer to the [schema-level privileges documentation](/Overview/Security/Role-Based%20Access%20Control/database-permissions/schema-permissions.html). + +## [](#examples-of-granting-view-permissions)Examples of granting view permissions + +### [](#select-permission)SELECT permission + +To allow querying data from a view, the role must have **SELECT** privileges on the view. Additionally, the **view owner** must have **SELECT** privileges on all underlying tables or views referenced within the view. + +The following examples [grant](/sql_reference/commands/access-control/grant.html) the role `read_role` permission to query data from the `viewtest` view and ensure the `view_owner` has the necessary permission to read data from the `referenced_table` table, allowing the view to function correctly. + +``` +-- Grant SELECT on the view to a user: +GRANT SELECT ON VIEW "viewtest" TO read_role; + +-- Grant SELECT on the referenced table to the view owner: +GRANT SELECT ON TABLE "referenced_table" TO view_owner; +``` + +If the **view owner** loses access to any of these referenced objects, users with **SELECT** on the view will no longer be able to query it, even if their **SELECT** privilege remains. + +### [](#modify-permission)MODIFY permission + +The following code example grants the role `developer_role` permission to alter or drop the `my_view` view: + +``` +GRANT MODIFY ON VIEW my_view TO read_role; +``` + +### [](#all-permissions)ALL permissions + +The following code example grants the role `developer_role` with all permissions over the `my_view` view: + +``` +GRANT ALL ON VIEW my_view TO read_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_engine_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_engine_permissions.md new file mode 100644 index 0000000..41b79af --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_engine_permissions.md @@ -0,0 +1,54 @@ +# [](#engine-permissions)Engine permissions + +In Firebolt, an **engine** is a compute resource that processes data and serves queries. Engines provide **full workload isolation**, allowing multiple workloads to run independently while sharing access to the same data. Engines are also **decoupled from databases**, which means: + +- An engine can connect to multiple databases. +- A database can be accessed by multiple engines. + +The following table outlines the privileges that can be granted for engines within a particular account: + +Privilege Description GRANT Syntax REVOKE Syntax USAGE Allows using an engine to run queries. `GRANT USAGE ON ENGINE TO ;` `REVOKE USAGE ON ENGINE FROM ;` OPERATE Allows stopping and starting an engine. `GRANT OPERATE ON ENGINE TO ;` `REVOKE OPERATE ON ENGINE FROM ;` MODIFY Allows altering engine properties or dropping the engine. `GRANT MODIFY ON ENGINE TO ;` `REVOKE MODIFY ON ENGINE FROM ;` MONITOR \[USAGE] Enables the tracking of engine queries through the `engine_running_queries` view for active queries and the `engine_query_history` view for past queries in `information_schema`. `GRANT MONITOR USAGE ON ENGINE TO ;` `REVOKE MONITOR USAGE ON ENGINE FROM ;` ALL \[PRIVILEGES] Grants all privileges over the engine to a role. `GRANT ALL ON ENGINE TO ;` `REVOKE ALL ON ENGINE FROM ;` + +If a user lacks **USAGE** and **OPERATE** privileges for an engine, they will not be able to select or interact with the engine via the Firebolt UI. + +## [](#examples-of-granting-engine-permissions)Examples of granting engine permissions + +### [](#usage-permission)USAGE permission + +The following code example grants the role `developer_role` permission to use the `myEngine` engine for executing queries: + +``` +GRANT USAGE ON ENGINE "myEngine" TO developer_role; +``` + +### [](#operate-permission)OPERATE permission + +The following code example gives the role `developer_role` permission to start and stop the `myEngine` engine: + +``` +GRANT OPERATE ON ENGINE "myEngine" TO developer_role; +``` + +### [](#modify-permission)MODIFY permission + +The following code example grants the role `developer_role` permission to alter properties or drop the `myEngine` engine: + +``` +GRANT MODIFY ON ENGINE "myEngine" TO developer_role; +``` + +### [](#monitor-usage-permission)MONITOR \[USAGE] permission + +The following code example grants the role `developer_role` permission to see the query history and currently running queries for the engine `myEngine`: + +``` +GRANT MONITOR USAGE ON ENGINE "myEngine" TO developer_role; +``` + +### [](#all-permissions)ALL permissions + +The following code example grants the role `developer_role` with all engine permissions on `myEngine`: + +``` +GRANT ALL ON ENGINE "myEngine" TO developer_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_ownership.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_ownership.md new file mode 100644 index 0000000..576c132 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_ownership.md @@ -0,0 +1,62 @@ +# [](#ownership)Ownership + +When you create an object in Firebolt, you become its owner. As the owner, you have full control and can perform all operations on the object without needing additional privileges. This allows you to use objects immediately after creating them. + +As the owner of an object, you can do the following: + +- Grant privileges on the object to any role. +- Grant roles you own to other users or roles without needing administrator permissions. + +## [](#supported-object-types)Supported object types + +The following object types support ownership: + +- **Role** +- **User** +- **Engine** +- **Database** +- **Schema** +- **Table** +- **View** + +The current owner of an object can be viewed in the corresponding `information_schema` view: + +Role Name View Role N/A User `information_schema.users` Database `information_schema.catalogs` Engine `information_schema.engines` Schema `information_schema.schemata` Table `information_schema.tables` View `information_schema.views` or `information_schema.tables` + +Indexes inherit ownership from their parent table. In `information_schema.indexes`, the table owner is displayed as the index owner. + +## [](#changing-an-objects-owner)Changing an object’s owner + +The following code example shows how to transfer the ownership of an object: + +``` +ALTER OWNER TO +``` + +## [](#examples-of-updating-ownership-permissions)Examples of updating ownership permissions + +The following are examples of how to change the ownership of different object types. + +### [](#changing-object-ownership)Changing object ownership + +The following code example uses `ALTER` to transfer ownership of a database, engine, role, user, schema, table, and view to a new owner: + +``` +ALTER DATABASE db OWNER TO new_owner +ALTER ENGINE eng OWNER TO new_owner +ALTER ROLE r OWNER TO new_owner +ALTER USER u OWNER TO new_owner +ALTER SCHEMA public OWNER TO new_owner +ALTER TABLE t OWNER TO new_owner +ALTER VIEW v OWNER TO new_owner +``` + +### [](#dropping-users-that-own-objects)Dropping users that own objects + +Before dropping a user who owns objects, you must either drop the objects owned by the owner or transfer ownership of them to another user. + +The following code example shows how to drop a table that has dependent views not owned by the table’s owner using the `CASCADE` parameter to enforce the drop: + +``` +DROP TABLE CASCADE; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management.md new file mode 100644 index 0000000..173f4e6 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management.md @@ -0,0 +1,10 @@ +# [](#role-management-in-firebolt)Role management in Firebolt + +Role management in Firebolt allows you to control access to resources within your data warehouse by assigning specific privileges to users and roles. Firebolt supports two types of roles: [system roles](/Overview/Security/Role-Based%20Access%20Control/role-management/system-roles.html) and [custom roles](/Overview/Security/Role-Based%20Access%20Control/role-management/custom-roles.html). + +Privileges can be granted to custom roles either by the [account\_admin](/Overview/organizations-accounts.html#account-administrative-role) or the [resource owner](/Overview/Security/Role-Based%20Access%20Control/ownership.html). + +* * * + +- [Default System Roles](/Overview/Security/Role-Based%20Access%20Control/role-management/system-roles.html) +- [Custom Roles](/Overview/Security/Role-Based%20Access%20Control/role-management/custom-roles.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_custom_roles.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_custom_roles.md new file mode 100644 index 0000000..7c62835 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_custom_roles.md @@ -0,0 +1,7 @@ +# [](#custom-roles)Custom roles + +In Firebolt, custom roles can be created at the account level to manage access control and permissions. Only users with the [account\_admin](/Overview/organizations-accounts.html#account-administrative-role) system role or those granted the [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) privilege can create custom roles. Once created, custom roles can be assigned to any user or existing role by the `account_admin` or the resource owner. + +Privileges can be granted to custom roles by `account_admin` or the [resource owner](/Overview/Security/Role-Based%20Access%20Control/ownership.html). For example, the owner of a table can grant `SELECT` privileges on that table to a custom role. + +For more information about creating roles, see [creating a custom role via SQL or the Firebolt UI](/Guides/managing-your-organization/managing-users.html#create-a-role). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_system_roles.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_system_roles.md new file mode 100644 index 0000000..0c0ea72 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_management_system_roles.md @@ -0,0 +1,15 @@ +# [](#default-system-roles)Default system roles + +In Firebolt, **system-defined** roles are automatically created for each organization and account. These roles provide predefined privileges and serve specific purposes. While system-defined roles **cannot** be modified or dropped, you can grant them additional privileges as needed. + +## [](#organization-system-roles)Organization system roles + +Role Name Description organization\_admin Enables all the permissions and the ability to manage the organization. + +The [organization\_admin](/Overview/organizations-accounts.html#organizational-administrative-role) role cannot be granted using SQL. It can only be granted using the [Firebolt Workspace](https://go.firebolt.io/signup) user interface (UI). To manage resources at the organization level, you must assign the `organization_admin` role to your login using the UI. + +## [](#account-system-roles)Account system roles + +Role Name Description public Includes `USAGE` on all databases and both `USAGE` and `CREATE` on every public schema. system\_admin Enables managing databases, engines, schemas, tables, and views. This includes setting database and engine properties as well as access to the observability functionality on all engines. account\_admin Grants full permissions to manage the organization. + +By default, every newly created user is granted the [public](/Overview/organizations-accounts.html#public-role) role. You can also revoke this role from a user. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_permissions.md new file mode 100644 index 0000000..0eec590 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_role_permissions.md @@ -0,0 +1,17 @@ +# [](#role-permissions)Role permissions + +In Firebolt, a [role](/Overview/organizations-accounts.html#roles) is a group of permissions that can have privileges assigned to it. You can [grant](/sql_reference/commands/access-control/grant.html#grant-role) a role to another role or to users. + +The following table outlines the privileges that can be granted for roles within a particular account: + +Privilege Description GRANT Syntax REVOKE Syntax MODIFY Grants the ability to drop the specified role. `GRANT MODIFY ON ROLE TO ;` `REVOKE MODIFY ON ROLE FROM ;` + +## [](#examples-of-granting-role-permissions)Examples of granting role permissions + +### [](#modify-permission)MODIFY permission + +The following code example grants the role `developer_role` permission to drop the `my_role` role: + +``` +GRANT MODIFY ON ROLE my_role TO developer_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_user_permissions.md b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_user_permissions.md new file mode 100644 index 0000000..4fc2d2b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_role_based%20access%20control_user_permissions.md @@ -0,0 +1,19 @@ +# [](#user-permissions)User permissions + +In Firebolt, a [user](/Overview/organizations-accounts.html#users) is associated with a [login](/Guides/managing-your-organization/managing-logins.html) or [service account](/Guides/managing-your-organization/service-accounts.html), which grants them access to that account. You can assign a [role](/Overview/organizations-accounts.html#roles) to a user, and the role determines the specific actions they are authorized to perform within the account. + +The following table outlines the privileges that can be granted for users within a particular account: + +Privilege Description GRANT Syntax REVOKE Syntax MODIFY Grants the ability to drop the specified user. `GRANT MODIFY ON USER TO ;` `REVOKE MODIFY ON USER FROM ;` + +Users can modify most of their own account settings without requiring [RBAC](/Overview/Security/Role-Based%20Access%20Control/#role-based-access-control-rbac) permissions, except when altering [LOGIN](/Guides/managing-your-organization/managing-logins.html) configurations or a [SERVICE ACCOUNT](/Guides/managing-your-organization/service-accounts.html). + +## [](#examples-of-granting-user-permissions)Examples of granting user permissions + +### [](#modify-permission)MODIFY permission + +The following code example grants the role `developer_role` permission to drop the `my_user` user: + +``` +GRANT MODIFY ON USER my_user TO developer_role; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/overview_security_security.md b/cmd/docs-scrapper/fireboltdocs/overview_security_security.md new file mode 100644 index 0000000..34163de --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/overview_security_security.md @@ -0,0 +1,197 @@ +# [](#security)Security + +Firebolt employs a layered security strategy to deliver a secure and trusted cloud data warehouse tailored to modern enterprise needs. By integrating advanced security features and industry best practices, Firebolt ensures that its security practices are compliant with security standards. Your data is protected, remains secure, and is accessible only to authorized individuals. Access to objects and resources is managed through accounts, establishing clear boundaries to isolate and secure data. + +Firebolt’s layered security model has the following key areas: + +- [Security](#security) + + - [Network security](#network-security) + - [Identity management](#identity-management) + + - [Single sign-On (SSO)](#single-sign-on-sso) + - [Multi-factor authentication (MFA)](#multi-factor-authentication-mfa) + - [Access control](#access-control) + + - [Roles](#roles) + - [Users](#users) + - [Objects and permissions](#objects-and-permissions) + - [Data protection](#data-protection) + + - [Data at rest](#data-at-rest) + - [Data in motion](#data-in-motion) + - [Secure communication protocols](#secure-communication-protocols) + - [AWS PrivateLink](#aws-privatelink) + - [HIPAA compliance](#hipaa-compliance) + +## [](#network-security)Network security + +Firebolt ensures secure data transmission by implementing end-to-end encryption with Transport Layer Security (TLS) version 1.2, protecting data as it moves between end users and the cloud service. + +Firebolt supports the creation of custom [network policies](/Guides/security/network-policies.html), adding an extra layer of security to your applications. This functionality allows administrators to exercise fine-grained control over which IP ranges can access Firebolt. + +In Firebolt, network policies contain `allowed_IP_list` and `blocked_IP_list` properties that capture definition of IP address ranges. Each property is a list that can contain one or more IP ranges. + +Firebolt supports individual and programmatic access through the following: + +- [Login](/Guides/managing-your-organization/managing-logins.html) - A login object represents an individual user, identified by an email address, who will authenticate by verifying their identity to access Firebolt. +- [Service account](/Guides/managing-your-organization/service-accounts.html) - A service account object is used to represent a machine or application that will authenticate and interact with Firebolt without human intervention. + +**Example** + +The following code example creates a network policy `my_network_policy` with a description that allows only two IP addresses: + +``` +CREATE NETWORK POLICY IF NOT EXISTS my_network_policy WITH ALLOWED_IP_LIST = (‘4.5.6.1’, ‘2.4.5.1’) +DESCRIPTION = 'my new network policy' +``` + +A network policy can be attached to [an organization](/Guides/managing-your-organization/creating-an-organization.html), individual logins, and service accounts. + +**Example** + +The following code example applies `my_network_policy` to `my_organization` and to the login associated with `kate@acme.com`, which restricts access according to the rules of that policy: + +``` +ALTER ORGANIZATION my_organization SET NETWORK_POLICY = my_network_policy; +ALTER LOGIN 'kate@acme.com' SET NETWORK_POLICY = my_network_policy; +``` + +For more information, see [network policies](/Guides/security/network-policies.html). + +## [](#identity-management)Identity management + +Identity management is a multi-step verification process designed to ensure that only authorized individuals, services, and applications can access organizational resources. Identity management involves both identifying users and verifying their identity through authentication. + +Firebolt uses [Auth0](https://auth0.com/) as its identity provider for managing customer registration. All authentication data stored in Auth0 is protected with industry-standard cryptography. Authentication information is securely exchanged using the [SAML 2.0 protocol](https://auth0.com/intro-to-iam/what-is-saml). + +Firebolt provides the SSO and MFA authentication methods. + +### [](#single-sign-on-sso)Single sign-On (SSO) + +[Single Sign-On (SSO)](/Guides/security/sso/) is an authentication method that allows users to access multiple applications or services using a single set of login credentials, simplifying the authentication process and improving security through centralized identity management. Firebolt uses SSO to simplify and streamline implementation of secure access to its platform, enhancing the overall security posture and protecting against unauthorized access and data breaches. SSO configuration is accessible to users with the `org_account` built-in role. + +### [](#multi-factor-authentication-mfa)Multi-factor authentication (MFA) + +[MFA](/Guides/security/enabling-mfa.html) strengthens security by requiring users to provide multiple forms of authentication to access their accounts. Many industries have compliance and regulatory standards that require the use of MFA for securing certain types of data and systems. Firebolt fully supports these standards by offering MFA configuration and implementation directly linked to the login object, ensuring secure and compliant access control. + +**Example** + +The following code example creates a login for a user, enables password-based authentication, and requires MFA to authenticate: + +``` +CREATE LOGIN "kate@acme.com" WITH +FIRST_NAME = 'Kate' +LAST_NAME = 'Peterson' +IS_PASSWORD_ENABLED = TRUE +IS_MFA_ENABLED = TRUE; +``` + +## [](#access-control)Access control + +Access control ensures that users have the necessary and appropriate permissions to engage with Firebolt’s system or resources. Firebolt implements [role-based access control](/Guides/security/rbac.html) (RBAC) to manage permissions. + +The RBAC model is centered around the following principles: + +- All objects can be secured. +- Every supported statement requires explicit permission, preventing unauthorized actions. +- The RBAC model is composable, which means that a user’s total permissions are the result of combining all the roles assigned to them, giving them the collective access granted by each role. +- Roles are hierarchical and allow permissions to be inherited through role relationships. + +The RBAC model contains the following: + +- [Roles](#roles) +- [Users](#users) +- [Objects and permissions](#objects-and-permissions). + +### [](#roles)Roles + +A role is a set of permissions assigned to a user or group that defines what actions they are authorized to perform and what resources they can access within Firebolt. Firebolt has the following types of roles: + +1\) **Built-in roles** have a set of pre-defined permissions and custom user-defined roles that can allow for more specific use cases. You can use [GRANT](/sql_reference/commands/access-control/grant.html) and [REVOKE](/sql_reference/commands/access-control/revoke.html) statements to modify permissions for custom roles. Built-in roles become available as soon as a new organization is created, and the first account is set up. + +2\) **User-defined roles** are custom roles that administrators can create to grant a specific set of permissions. + +3\) **System-defined roles** align with common user personas and responsibilities including `public`, which is granted to each new user by default, a `system_admin` role, and an `account_admin` role. For more information about these roles, see [System-defined roles](/Overview/Security/Role-Based%20Access%20Control/role-management/system-roles.html). + +You can create a role by using either the **Firebolt Workspace** or using the [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) SQL statement. + +**Example** + +The following example creates a new role `sales`, which can later be assigned specific permissions and granted to users to inherit those permissions: + +``` +CREATE ROLE sales; +``` + +### [](#users)Users + +Users are linked to either a login or service account in order to gain access to Firebolt. They can be created using the [CREATE USER](/sql_reference/commands/access-control/create-user.html) statement in SQL or through the **Firebolt Workspace**. + +**Example** + +The following code creates a new users `kate` and `bob`: + +``` +CREATE USER kate; +CREATE USER bob; +``` + +**Example** + +The following code example grants the permissions associated with the `sales` role to `kate` and revokes it from `bob`: + +``` +GRANT ROLE sales TO USER kate; +REVOKE ROLE sales FROM USER bob; +``` + +### [](#objects-and-permissions)Objects and permissions + +Permissions in Firebolt define the actions or operations that can be performed, such as managing databases and engines, running queries, or accessing data. Each instance of a securable object, or an object that can be protected by access controls, has specific permissions that are associated with it, controlling what users can do with it. Examples of securable objects include databases, tables, and engines. If there are multiple instances of an engine object, each instance has its own set of predefined permissions. For a full list of available permissions, see [role-based access control](/Overview/Security/Role-Based%20Access%20Control/) + +Any permission that Firebolt supports can be [granted](/sql_reference/commands/access-control/grant.html) or [revoked](/sql_reference/commands/access-control/revoke.html) to or from roles. + +Privileges can be granted or revoked only for roles, not directly for users. Once a role has the necessary permissions, it can then be assigned to users, allowing them to inherit those privileges. + +**Example** + +The following code example grants the `sales` role permission to use the `sales_db` database, allows the role to access any database within `dev_account`, and revokes the ability of the `sales` role to start or stop the `sales_eng` engine: + +``` +GRANT USAGE ON DATABASE sales_db TO sales; -- grants the ability to use sales_db database to the sales role +GRANT USAGE ANY DATABASE ON ACCOUNT dev_account TO sales; -- grants the ability to use any database in dev_account to the sales role +REVOKE OPERATE ON ENGINE sales_eng FROM sales; -- revokes the ability to START and STOP the sales_eng engine from sales role +``` + +## [](#data-protection)Data protection + +Firebolt is firmly committed to data security, privacy, and compliance by ensuring that all data it manages is properly safeguarded and protected through strict encryption standards for data both in motion and at rest. The following security functionality is automatically available to customers: + +### [](#data-at-rest)Data at rest + +By default, all data at rest is encrypted and stored using Amazon Simple Storage Service (S3). All new objects are automatically encrypted using either Amazon S3-managed keys or AWS Key Management Service (KMS) keys, which are securely managed through AWS KMS. + +### [](#data-in-motion)Data in motion + +Firebolt automatically encrypts sensitive data being transmitted between service components, ensuring that it remains secure as it moves across networks and cannot be intercepted by unauthorized parties. + +### [](#secure-communication-protocols)Secure communication protocols + +Firebolt uses secure communication protocols, such as Transport Layer Security (TLS), to provide an additional layer of protection against man-in-the-middle attacks. These attacks occur when an unauthorized party intercepts or alters data as it is transmitted between two points. By encrypting data and ensuring secure connections, Firebolt prevents unauthorized access or tampering during data transmission, safeguarding sensitive information as it moves between systems. + +### [](#aws-privatelink)AWS PrivateLink + +AWS PrivateLink enables secure connectivity between your VPC and Firebolt without routing traffic over the public internet. Firebolt supports AWS PrivateLink to provide a private, one-way connection from your VPC to the Firebolt private API, ensuring data confidentiality, integrity, and availability during transit. + +Users with an [account administrator](/Overview/organizations-accounts.html#account-administrative-role) or [organizational administrative](/Overview/organizations-accounts.html#organizational-administrative-role) role can request access to PrivateLink. For more information, see [Request PrivateLink access](/Guides/security/privatelink.html#request-aws-privatelink-access). + +### [](#hipaa-compliance)HIPAA compliance + +HIPAA compliance consists of federal regulations designed to safeguard the privacy and security of patient health information. Firebolt supports HIPAA compliance to ensure the confidentiality, integrity, and availability of electronic protected health information (ePHI) stored within its platform. + +To modify the state of HIPAA compliance for your account, contact the [Firebolt support team](mailto:support@firebolt.io). + +* * * + +- [Role-Based Access Control](/Overview/Security/Role-Based%20Access%20Control/) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/product.md b/cmd/docs-scrapper/fireboltdocs/product.md new file mode 100644 index 0000000..ac2ecaf --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/product.md @@ -0,0 +1,87 @@ +# [](#what-is-firebolt)What is Firebolt? + +Firebolt is a next-generation cloud data warehouse designed for organizations that require lightning-fast analytics at scale. Whether you’re dealing with complex data applications, high-concurrency workloads, or low-latency queries, Firebolt empowers you to unlock the full potential of your data without compromising on performance or cost. + +# [](#why-firebolt)Why Firebolt? + +Firebolt is designed specifically to address the demands of modern data workloads, prioritizing **exceptional efficiency** and **low cost** while delivering industry-leading performance for data-intensive applications. + +Firebolt addresses the following challenges: + +- High latency and costly performance during data processing and retrieval. +- Poor query performance under heavy loads. +- Scaling difficulties when managing large datasets. +- Complexity in managing and optimizing data workflows while working with unfamiliar languages. + +The following sections highlight Firebolt’s key benefits, as well as its compatibility with various frameworks and workloads for seamless data integration and processing: + +- [Key benefits](#key-benefits) +- [Eco-system and integrations](#eco-system-and-integrations) + +# [](#key-benefits)Key benefits + +Firebolt is inherently **scalable** helping you to adapt rapidly to changes in your data and workloads. Key benefits include: + +- [High Efficiency](#-high-efficiency) — Achieve exceptional price-to-performance ratios, delivering fast analytics without high costs. +- [Concurrency at Scale](#-concurrency-at-scale) — Run thousands of queries concurrently, maintaining sub-second performance even under heavy loads. +- [Elasticity](#-elasticity) — Seamlessly scale to handle hundreds of terabytes of data without sacrificing speed or efficiency. +- [SQL Simplicity](#-sql-simplicity) — Use a subset of PostgreSQL-compatible SQL, allowing teams to adapt easily with minimal training. + +### [](#-high-efficiency)![Icon for efficiency.](../../assets/images/icon-efficiency.png) High efficiency + +Firebolt delivers low-latency, high performance analytics with one of the best price-to-performance ratios in the industry. Its architecture is optimized for fast query execution through features like vectorized processing and sparse indexing, which minimize data scans and optimize CPU usage. These features enable fast responses to data-intensive queries, even at petabyte scale, without overloading compute resources. Whether analyzing structured or semi-structured data, Firebolt delivers millisecond-level query responses, backed by ACID compliance to ensure data consistency, integrity, and reliability. + +### [](#-concurrency-at-scale)![Icon for concurrency at scale.](../../assets/images/icon-concurrency.png) Concurrency at scale + +Firebolt enables thousands of concurrent queries, ensuring your applications can manage heavy query loads with consistent, reliable performance. Its fine-grained scaling capabilities allow for high query throughput efficiently, even during peak workloads, ensuring optimal resource allocation and minimizing query latency. + +### [](#-elasticity)![Icon for elasticity or vertical, horizontal or concurrent scaling.](../../assets/images/icon-elasticity.png) Elasticity + +Firebolt’s fully decoupled architecture and multi-dimensional elasticity allow compute, storage, and management resources to scale independently, optimizing both performance and cost efficiency. This architecture enables fine-grained control over resources as your workloads evolve, including scaling out to accommodate massive datasets. You can access any database from any engine, giving you flexibility to access any data while offering workload isolation to achieve predictable performance. Furthermore, Firebolt’s system allows fine-grained control over provisioned resources for achieving needed price-performance characteristics and minimizing cost. Firebolt’s architecture supports: + +- **Vertical scaling** - Scale up to increase the capacity of your engine to process complex queries for data-intensive workloads. +- **Horizontal scaling** - Scale out by adding more compute nodes to handle higher data processing demands efficiently. +- **Concurrent scaling** - Run multiple clusters within a single engine, which can scale up to ten clusters simultaneously. Firebolt manages concurrency scaling transparently to user applications without requiring any endpoint changes. + +The following diagram includes code examples of how to scale vertically, horizontally or scale for concurrency using SQL in the **Firebolt Workspace**: + +![You can scale vertically, horizontally, or concurrently in the Firebolt Workspace.](../../assets/images/product-scaling-engines.png) + +Firebolt’s multi-dimensional approach to elasticity allows it to dynamically adapt to any workload, ensuring optimal system performance while keeping costs under control. + +### [](#-sql-simplicity)![Icon for SQL simplicity.](../../assets/images/icon-simplicity.png) SQL simplicity + +Firebolt supports a PostgreSQL-compliant SQL dialect, allowing your teams to leverage Firebolt’s capabilities without needing to learn a new query language. This simplifies integration with existing workflows that contain tasks that include data provisioning, processing, and management. With support for both structured and [semi-structured data](/Guides/loading-data/working-with-semi-structured-data/working-with-semi-structured-data.html), Firebolt allows you to analyze diverse datasets within a single platform. + +# [](#eco-system-and-integrations)Eco-system and integrations + +Firebolt’s platform is optimized for integration within modern data workflows. It supports ingesting data efficiently through ELT tools, making it easy to move data from data lakes, relational databases, and other source systems into Firebolt. With support for Amazon S3 cloud storage and popular file formats like Avro, Parquet, and ORC, Firebolt allows you to centralize and query your data with ultra-fast analytics, seamlessly fitting into your existing data architecture. + +You can leverage industry-standard tools like Apache Airflow, dbt, and Superset for orchestration and visualization. Firebolt’s SDKs offer wide support for language clients like Python, Node.js, Java, and .NET. This flexibility empowers your team to build and query data using their preferred environments, ensuring smooth data workflows from ingestion to advanced analytics. + +![Firebolt supports popular SDKs and connectors to integrate with many workflows.](../../assets/images/firebolt-framework.png) + +### [](#-workload-isolation-for-smooth-operations)![Icon for workload isolation.](../../assets/images/icon-isolation.png) Workload isolation for smooth operations + +Firebolt optimizes workloads by considering configuration, resource utilization, and history-based statistics to balance both latency and throughput. Any Firebolt engine can handle both read and write operations on any database, ensuring strong consistency across all engines. Workloads are managed independently with dedicated compute resources, allowing you to run complex ELT processes, fast queries, BI reports, among others, without interference. Workload isolation ensures that resource-heavy tasks do not impact your most critical applications and dashboards, enabling smooth operations across diverse use cases. + +# [](#next-steps)Next steps + +Read about Firebolt’s [platform capabilities](/product/product-platform.html). + +# [](#more-firebolt-resources)More Firebolt resources + +Learn how to load and query your data with our [Get Started](/Guides/getting-started/) and [Load data](/Guides/loading-data/loading-data.html) guides or use one of Firebolt’s [test data sets](https://www.firebolt.io/free-sample-datasets). + +[Sign up](https://go.firebolt.io/signup) to access your $200 Firebolt credit for your first 30 days. No credit card needed. + +- See news, blog posts, whitepapers, events, and videos at [Firebolt’s Knowledge center](https://www.firebolt.io/knowledge-center). +- Explore our [competitive pricing options](https://www.firebolt.io/pricing). + +Need help? Firebolt’s support team at [support@firebolt.io](mailto:support@firebolt.io) is here to assist you with: + +- Onboarding support. +- Troubleshooting query performance. +- Optimizing database configurations. +- Addressing data loading issues. +- Getting best practices for data modeling. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/product_product_platform.md b/cmd/docs-scrapper/fireboltdocs/product_product_platform.md new file mode 100644 index 0000000..8835743 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/product_product_platform.md @@ -0,0 +1,33 @@ +# [](#firebolt-platform-capabilities)Firebolt platform capabilities + +## [](#-data-management)![Icon for data management.](../../assets/images/icon-data_management.png) Data management + +Firebolt is built for performance and efficiency, with built-in features that reduce manual effort. Its robust architecture supports parallel data ingestion, ACID-compliant DML, and optimized storage, allowing it to manage large and complex datasets. Automated indexing and backend optimization ensure consistent query performance, even during data modifications like inserts, updates, and deletes. Firebolt also supports both structured and semi-structured data, making it easy to integrate into existing workflows for tasks such as table creation, data ingestion, and transaction management. + +For more information, see [Data management](/Overview/data-management.html). + +## [](#-query-processing)![Icon for query processing.](../../assets/images/icon-processing.png) Query processing + +Firebolt’s query processing is optimized for low latency, high concurrency, and dynamic scaling to meet workload demands. Its optimizer creates efficient execution plans by evaluating data distribution, indexing, and previous query patterns. The distributed runtime system leverages multithreading and tiered caching for faster processing, while resource-aware scheduling and efficient memory management enhance cluster performance. This setup ensures fast, consistent query execution, even for complex queries and large datasets. + +You can also use the Firebolt API for programmatic access to performing tasks including querying, managing resources, and automating workflows. + +For more information, see the [Firebolt API](/Guides/query-data/using-the-api.html), and the Firebolt [functions glossary](/sql_reference/functions-reference/functions-glossary.html). + +## [](#-security)![Icon for security.](../../assets/images/icon-security.png) Security + +Firebolt employs a multi-layered approach to data protection, utilizing industry-standard encryption, secure key management, and granular access control to meet the needs of organizations that build [large, data-intensive applications and products](https://www.firebolt.io/knowledge-center/case-studies). With HIPAA and SOC 2 compliance, Firebolt ensures the confidentiality, integrity, and availability of your data. Identity management combines username/password authentication, Single Sign-On (SSO), and Multi-Factor Authentication (MFA) to secure access to resources. Role-Based Access Control (RBAC) assigns permissions through hierarchical and composable roles, allowing only authorized users to access critical data and system resources. + +![Firebolt's security layers include access control, identity management, infrastructure and network security.](../../assets/images/firebolt-security-layers.png) + +For more information, see [Security](/Overview/Security/security.html). + +## [](#-observability)![Icon for observability.](../../assets/images/icon-observability.png) Observability + +Firebolt’s observability features provide detailed metrics on CPU, RAM, disk usage, and cache efficiency through the `engine_metrics_history` and `engine_running_queries` views. These metrics help optimize engine configurations and resource allocation. Firebolt also integrates with OpenTelemetry, enabling users to track telemetry data for deeper insights into performance across distributed systems. This integration enhances observability and supports data-driven adjustments. Access to these tools is managed through Role-Based Access Control (RBAC), ensuring secure management of system resources. + +## [](#-collaborative-workspace)![Icon for collaboration.](../../assets/images/icon-collaboration.png) Collaborative workspace + +Delivering insights and data products requires collaboration among multiple roles, including data architects, engineers, developers, and IT operations. The **Firebolt Workspace** facilitates team coordination by providing visibility across the entire data lifecycle. It includes dedicated areas for security and governance, data modeling, exploration, SQL development, and performance management, ensuring each role can contribute effectively. + +In the Firebolt **Develop Space**, you can edit and run SQL scripts, manage databases, and view query results. This space features a document editor with auto-complete and script templates. You can also rename, copy, or export scripts, and execute SQL snippets or entire scripts with real-time result display. The **Develop Space** interface supports light and dark modes and allows for exporting query results for further use. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference.md b/cmd/docs-scrapper/fireboltdocs/reference.md new file mode 100644 index 0000000..8ce4fd7 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference.md @@ -0,0 +1,12 @@ +# [](#general-reference)General reference + +Get product updates from release notes, and general reference material for advanced Firebolt specifications. + +* * * + +- [Release notes](/Reference/release-notes/release-notes.html) +- [Available regions](/Reference/available-regions.html) +- [System settings](/Reference/system-settings.html) +- [Object identifers](/Reference/object-identifiers.html) +- [Reserved words](/Reference/reserved-words.html) +- [Help menu](/Reference/help-menu.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_available_regions.md b/cmd/docs-scrapper/fireboltdocs/reference_available_regions.md new file mode 100644 index 0000000..da967e5 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_available_regions.md @@ -0,0 +1,7 @@ +# [](#available-aws-regions)Available AWS regions + +Firebolt is available in the following AWS regions: + +Region name Region ID US West (Oregon) us-west-2 US East (N. Virginia) us-east-1 Europe (Frankfurt) eu-central-1 Europe (Ireland) eu-west-1 Asia Pacific (Singapore) ap-southeast-1 + +To request additional regions, contact Firebolt’s support team at [support@firebolt.io](mailto:support@firebolt.io). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_help_menu.md b/cmd/docs-scrapper/fireboltdocs/reference_help_menu.md new file mode 100644 index 0000000..1813644 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_help_menu.md @@ -0,0 +1,108 @@ +# [](#help-menu)Help menu + +The **Firebolt Help menu** is found in the bottom left corner of the screen, appearing as a question mark (`?`). + +![Help Menu](/assets/images/getting_to_help.png) + +## [](#menu-options)Menu options + +The help menu has the following options: + +- Status Page +- Release Notes +- Contact Support +- Firebolt Support Access +- Documentation + +### [](#status-page)Status page + +The status page allows you to view the operational status of systems in real time, as well as descriptions of historical incidents. + +You can subscribe to notifications whenever Firebolt **creates**, **updates**, or **resolves** an incident through the following options: + +1. **Email notification** - by providing your email address. +2. **Text notifications** - by providing your phone number. *You will only receive text notifications when Firebolt either creates or resolves an incident.* +3. **Slack notifications** - by connecting via Slack. *You can receive maintenance status updates through Slack* +4. **RSS updates** - by connecting via your RSS/Atom application. + +### [](#release-notes)Release Notes + +The Release Notes link brings you to Firebolt’s [latest version release notes](/Reference/release-notes/release-notes.html). + +### [](#contact-support---reaching-out-to-firebolt-support)Contact Support - Reaching out to Firebolt Support + +**Contact Support** allows you to create a support case for Firebolt’s support team. + +To create a case: + +1. Select **Contact Support**. +2. A support form will appear, with the following information populated **automatically**: + + - **First Name** + - **Last Name** + - **Organization** + - **Email** +3. Fill in the following: + + - **Account** - Select the name of your account within your organization. + - **Severity** - Select the severity of your case according to the following categories: + + - *Critical*: You are currently experiencing a loss of critical functionality. For example, your engine is unable to start. + - *Urgent*: Your critical functionality is being intermittently impacted. For example, your engine intermittently becomes unresponsive. + - *Tolerable*: Your services remain operational, though non-critical functionality may be impacted. For example, a specific SQL query generates an error. + - *Question*: Your operations are running smoothly with no disruptions. You may have a question or want to report a minor issue with the user interface. + - **Engine name** - Enter the name of the engine with the issue. + - **Subject** - Provide a clear and descriptive subject. + - **Description** - Provide any relevant details including the following: + + - What were you trying to do when the issue occurred? + - What was your expected outcome? + - What actually happened? + - What errors were returned? +4. Select **Submit**. +5. Your case will be sent to our Support team, and you will receive a confirmation email. + +### [](#firebolt-support-access)Firebolt Support Access + +Firebolt Support Access allows you to manage and control the level of access that Firebolt’s support team has to your account. + +For new accounts, support access is enabled by default; however you can revoke it at any time. + +All support team activity is logged and can be viewed in the [query history](../sql_reference/information-schema/engine-query-history). + +To grant access to the support team: + +1. Select “Firebolt Support Access” +2. A support access form will appear, with the following information populated **automatically**: + + - **Account Name** + - **Duration** + - **Assign Roles** +3. Fill in the following: + + - **Duration** - Specify the duration for which you’d like to grant access to the support team. Once this period ends, access will be automatically revoked. + - **Assign Roles** - Select the role(s) that you want to assign to the support team. Their access will be limited to these roles. You can create a dedicated role by following the [instructions here](/Overview/Security/Role-Based%20Access%20Control/role-management/custom-roles.html). Note that the support team will have full access data at the organization level. +4. Select **Grant Access**. + +Once access is granted, an additional icon will appear in the bottom left corner of the screen, indicating that Firebolt’s support team has access to your account, as shown below: + +![Support Access](/assets/images/support_access_menu.png) + +To edit support team access: + +1. Select the icon that appeared when you granted access to the support team. You can find this icon in the bottom left corner of your **Firebolt Workspace**. +2. Select **Manage access**. You can also open the same screen by selecting **Firebolt Support Access** in the **Help** menu. +3. Select **Edit Access**. +4. After you have made your changes, select **Update Access**. + +To revoke support team access: + +1. Select the icon that appeared when you granted access to the support team. You can find this icon in the bottom left corner of your **Firebolt Workspace**. +2. Select **Manage access**. You can also open the same screen by selecting **Firebolt Support Access** in the **Help** menu. +3. Select **Revoke Access**. + +Once revoked, the support access icon will disappear from the bottom left corner of the screen. + +### [](#documentation)Documentation + +The Documentation link brings you to Firebolt’s docs (where you are now!) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_interval_arithmetic.md b/cmd/docs-scrapper/fireboltdocs/reference_interval_arithmetic.md new file mode 100644 index 0000000..c1a1863 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_interval_arithmetic.md @@ -0,0 +1,96 @@ +# [](#arithmetic-with-intervals)Arithmetic with intervals + +This topic describes the Firebolt implementation of arithmetic with intervals. + +- [Overview](#overview) +- [Literal string interpretation](#literal-string-interpretation) + + - [Format examples](#format-examples) +- [Arithmetic between interval and TIMESTAMPTZ](#arithmetic-between-interval-and-timestamptz) +- [Examples](#examples) + +## [](#overview)Overview + +An `interval` represents a duration. In Firebolt, values of type `interval` can be used to add or subtract a duration to/from a date or timestamp. `Interval` cannot be used as the data type of a column. + +The `+` and `*` operators shown below come in commutative pairs (e.g., both `DATE + interval` and `interval + DATE` are accepted). + +Operator Description `DATE + interval -> TIMESTAMP` Add an `interval` to a `DATE` `DATE - interval -> TIMESTAMP` Subtract an `interval` from a `DATE` `TIMESTAMP + interval -> TIMESTAMP` Add an `interval` to a `TIMESTAMP` `TIMESTAMP - interval -> TIMESTAMP` Subtract an `interval` from a `TIMESTAMP` `TIMESTAMPTZ + interval -> TIMESTAMPTZ` Add an `interval` to a `TIMESTAMPTZ` `TIMESTAMPTZ - interval -> TIMESTAMPTZ` Subtract an `interval` from a `TIMESTAMPTZ` `interval * DOUBLE PRECISION -> interval` Multiply an `interval` by a scalar + +## [](#literal-string-interpretation)Literal string interpretation + +`Interval` literals can be specified in two formats. + +### [](#format-examples)Format examples + +**Cast from text literal** +The following examples demonstrate different ways to cast a text literal to an interval in SQL. Each method converts a string representing a time interval into the `INTERVAL` data type using various casting syntaxes. + +Using interval keyword: + +``` +interval 'quantity unit [quantity unit...] [direction]' +``` + +Using double colon: + +``` +'quantity unit [quantity unit...] [direction]'::interval +``` + +Using `CAST` function: + +``` +CAST('quantity unit [quantity unit...] [direction]' AS interval) +``` + +In these examples, `direction` can be either `ago` or left empty. Using `ago` negates all specified quantities. The `quantity` represents a signed or unsigned integer, and `unit` refers to one of the following time units, matched case-insensitively: + +Unit microsecond\[s] / us millisecond\[s] / ms second\[s] / s minute\[s] / m hour\[s] / h day\[s] / d week\[s] / w month\[s] / mon\[s] year\[s] / y decade\[s] / dec\[s] century / centuries / c millennium\[s] / mil\[s] + +Each `unit` can appear only once in an interval literal. The value of the interval is determined by adding the quantities of the specified units with the appropriate signs. + +**Unit outside of text literal** +The following example demonstrates how to cast a numeric value to an interval by placing the unit outside of the text literal. + +``` +interval 'N' unit +``` + +In this format, `N` represents a signed or unsigned integer, and `unit` specifies the time unit, matched case-insensitively, from the following options: + +Unit second minute hour day week month year + +## [](#arithmetic-between-interval-and-timestamptz)Arithmetic between interval and TIMESTAMPTZ + +Interval arithmetic with `TIMESTAMPTZ` values works as follows: + +1. Convert the `TIMESTAMPTZ` value from Unix time to local time according to the rules of the time zone specified by the session’s `time_zone` setting. +2. Add the `millennium`, `century`, `decade`, `year`, `month`, `week` and `day` components of the interval to the local time. +3. Convert the local time back to Unix time according to the rules of the time zone specified by the session’s `time_zone` setting. +4. Add the `hour`, `minute`, `second`, `millisecond`, and `microsecond` components of the interval to the Unix time. + +The back and forth between Unix time and local time is necessary to handle the fact that not all days consist of 24 hours due to daylight savings time transitions. For instance, `SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '1 day'` returns `2022-10-31 00:00:00+01` but `SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '24 hours'` returns `2022-10-30 23:00:00+01` (assuming the value of the session’s `time_zone` setting is `'Europe/Berlin'`). Still, the dependence on the session’s `time_zone` setting should be kept in mind when doing arithmetic between interval and `TIMESTAMPTZ`. + +### [](#multiplying-an-interval-by-a-scalar)Multiplying an interval by a scalar + +You can use the expression `date_time + INTERVAL * d` where `date_time` is a constant or column reference of type `DATE`, `TIMESTAMP`, or `TIMESTAMPTZ`, and `d` is a constant or column reference of type `DOUBLE PRECISION`. The effect is that the INTERVAL is scaled by `d`, and the resulting INTERVAL is added to `date_time`. E.g., `INTERVAL '1 day' * 3` -> `INTERVAL '3 days'`. + +## [](#examples)Examples + +``` +SELECT DATE '1996-09-03' - interval '1 millennium 5 years 42 day 42 ms'; --> 0991-07-22 23:59:59.958 +SELECT TIMESTAMP '1996-09-03 11:19:42' + interval '10 years 5 months 42 days 7 seconds'; --> 2007-03-17 11:19:49 + +SELECT TIMESTAMP '2023-10-20 11:49:52' + interval '1 year 6 months 4 weeks 7 hours' * 7.5; --> 2035-08-20 16:19:52 +SELECT DATE '2023-10-20' - 42 * interval '1 months 1 day 1 hour'; --> 2020-03-07 06:00:00 + +-- The following example shows a daylight savings time change in the time zone 'Europe/Berlin' +SET time_zone = 'Europe/Berlin'; +SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '1 day'; --> 2022-10-31 00:00:00+01 +SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '24' hour; --> 2022-10-30 23:00:00+01 + +SET time_zone = 'US/Pacific'; +SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '1 day'; --> 2022-10-30 15:00:00-07 +SELECT TIMESTAMPTZ '2022-10-30 Europe/Berlin' + interval '24' hour; --> 2022-10-30 15:00:00-07 +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_object_identifiers.md b/cmd/docs-scrapper/fireboltdocs/reference_object_identifiers.md new file mode 100644 index 0000000..1b0c651 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_object_identifiers.md @@ -0,0 +1,55 @@ +# [](#object-identifers)Object identifers + +Firebolt object identifiers are used to refer to database items such as columns, tables, indexes, views, and engines. + +## [](#syntax)Syntax + +Identifiers must contain at least one character, and no more than `255` characters total. + +## [](#unquoted-identifiers)Unquoted identifiers + +Unquoted identifiers must adhere to the following syntax: + +1. The **first character** must be a letter (a-z), or an underscore (`_`). +2. After the first character, **subsequent characters** can include letters, underscores, or digits (0-9). + +Firebolt evaluates unquoted identifiers such as table and column names **entirely in lowercase**. The following queries: + +``` +SELECT my_column FROM my_table +SELECT MY_COLUMN FROM MY_TABLE +SELECT mY_cOlUmn FROM mY_tAbLe +``` + +are all equivalent to: + +``` +SELECT my_column FROM my_table +``` + +You can keep uppercase identifiers by enclosing them in double-quotes. For example, the following identifiers are unique: + +``` +"COLUMN_NAME" +"column_name" +"CoLuMn_NaMe" +``` + +## [](#quoted-identifiers)Quoted identifiers + +Quoted identifiers can contain any UTF-8 characters of the following [Unicode general category values](https://www.unicode.org/reports/tr44/#General_Category_Values): + +1. Any letter in any language, as represented by the Unicode general category value for **Letter**. +2. Any numeric character in any language as represented by the Unicode general category value for **Number**. +3. Special characters beyond standard alphanumeric characters. Examples include `@`, `#`, `-`, `$`, `%`, `?`, and others. Any object identifier that contains special characters, spaces, or are case-sensitive must be enclosed in double quotes (`"`) as follows: `"my-column"` or `"User@Name"`. +4. Underscores, as represented by the Unicode general category value for **Connector\_Punctuation**. + +## [](#user-names)User names + +User names must conform to the following rules: + +- They must be between 3 to 63 characters in length. +- They can contain alphanumeric characters including upper and lowercase letters and numbers. +- The first and last characters must be either a letter or a digit. +- You can use the following special characters: ``@ ! # $ % & ' * + - = ? ^ _ ` { | } ~ .`` +- You can not use consecutive dots (..). \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_proof_of_concept_guide.md b/cmd/docs-scrapper/fireboltdocs/reference_proof_of_concept_guide.md new file mode 100644 index 0000000..a9d9c1a --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_proof_of_concept_guide.md @@ -0,0 +1,60 @@ +# [](#firebolt-proof-of-concept-technical-guidelines)Firebolt proof of concept technical guidelines + +> This guide should be used after you have met with your Firebolt designated sales team. If you are looking to try out Firebolt and have yet to contact us, [get started here.](https://www.firebolt.io/getting-started-now) + +- [Step 1 - Provide access to S3](#step-1---provide-access-to-s3) +- [Step 2 - Provide your Firebolt team with relevant assets](#step-2---provide-your-firebolt-team-with-relevant-assets) + +## [](#step-1---provide-access-to-s3)Step 1 - Provide access to S3 + +The data used in the proof of concept should be made available in an S3 region ([any region](/Reference/available-regions.html) is fine). To ensure the proof of concept accurately reflects your production workload, it’s recommended to have all or a significant portion of the data available. + +Firebolt pays for data transfer costs if such occurs. You can find additional information in [this AWS guide.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/RequesterPaysBuckets.html) + +Please go through the following steps to grant Firebolt access to the relevant S3 bucket. You can find additional information in [this AWS guide.](https://docs.aws.amazon.com/AmazonS3/latest/userguide/add-bucket-policy.html) + +1. Connect to the AWS console and click on the relevant S3 bucket. +2. Go to *Permissions* and then scroll down and edit *Bucket Policy*. +3. Copy the following policy. Make sure you replace `` with the actual bucket name. + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": { + "AWS": "arn:aws:iam::231290928314:root" + }, + "Action": [ + "s3:GetObject", + "s3:GetObjectTagging", + "s3:PutObject", + "s3:PutObjectTagging", + "s3:ListBucket", + "s3:GetBucketLocation" + ], + "Resource": [ + "arn:aws:s3:::", + "arn:aws:s3:::/*" + ] + } + ] + } + + ``` +4. Save changes + +## [](#step-2---provide-your-firebolt-team-with-relevant-assets)Step 2 - Provide your Firebolt team with relevant assets + +Please share the following assets with your Firebolt team through your designated Slack channel or email. + +Please make sure all files are saved as a `sql` file type (i.e, `file_name.sql`) + +1. **Schema** - this file should include the DDL commands that we should use to create the desired database schema. +2. **Sample queries & average durations** - this file should include a set of queries to be executed over the data set. These queries might be: + + - A good representation of queries you usually run + - Queries that are slow and you would like to see improved + + For each query, add a comment in the file describing the average duration, or any additional metric that might be relevant. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes.md b/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes.md new file mode 100644 index 0000000..757fe96 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes.md @@ -0,0 +1,39 @@ +# [](#release-notes)Release notes + +Firebolt continuously releases updates so that you can benefit from the latest and most stable service. These updates might happen daily, but we aggregate release notes to cover a longer time period for easier reference. The most recent release notes from the latest version are below. + +- See the [Release notes archive](/Reference/release-notes/release-notes-archive.html) for earlier-version release notes. + +Firebolt might roll out releases in phases. New features and changes may not yet be available to all accounts on the release date shown. + +## [](#firebolt-release-notes---version-418)Firebolt Release Notes - Version 4.18 + +### [](#new-features)New Features + +**Users can now ALTER their corresponding USER object without administrative or RBAC permissions** +Users can now [ALTER](/sql_reference/commands/access-control/alter-user.html) their corresponding [USER](/Overview/organizations-accounts.html#users) object and change its properties without needing role-based access control permissions ([RBAC](/Overview/Security/Role-Based%20Access%20Control/)). This enhancement simplifies user self-management by reducing the dependency on administrative permissions. Restrictions remain for sensitive properties including [logins or service accounts](/Overview/organizations-accounts.html#organizations), which require higher-level permissions. + +**Use a LOCATION object to store credentials for authentication** +You can now use [CREATE LOCATION](/sql_reference/commands/data-definition/create-location.html) to create a `LOCATION` object in your Firebolt account. Use `LOCATION` to store credentials and authenticate to external systems without needing to provide static credentials each time you run a query or create a table. `LOCATION` works with ([RBAC](/Overview/Security/Role-Based%20Access%20Control/)) so you can manage permissions securely. You can view detailed information about your locations including source type, URL, description, owner, and creation time in [information\_schema.locations](/sql_reference/information-schema/locations.html). + +**Added creation timestamps for tables, views, indexes, and locations** +Use creation timestamps in `information_schema` views for [tables](/sql_reference/information-schema/tables.html), [views](/sql_reference/information-schema/views.html), [indexes](/sql_reference/information-schema/indexes.html), and [locations](/sql_reference/information-schema/locations.html) to help track objects for data management. + +**Added support for SQL pipe syntax** +Firebolt now supports [SQL Pipe syntax](/sql_reference/commands/queries/pipe.html), an alternative way to structure SQL queries using the `|>` operator. This syntax allows for a linear, step-by-step flow of query transformations, improving readability and simplifying query composition. It supports all standard SQL operations and can be combined with traditional SQL syntax. + +**Added wildcard character functionality to `READ_PARQUET` and `READ_CSV` to simultaneously read multiple files** +You can use wildcard characters such as `*` or `?` to specify a file URL as a [glob pattern](https://en.wikipedia.org/wiki/Glob_%28programming%29) in the [READ\_PARQUET](/sql_reference/functions-reference/table-valued/read_parquet.html) and [READ\_CSV](/sql_reference/functions-reference/table-valued/read_csv.html) table-valued functions to read multiple files simultaneously. This enhancement simplifies managing large datasets by reducing the need to make multiple function calls. + +**Added functionality to transfer ownership of objects in the Firebolt Workspace** +You can now [transfer ownership](/Guides/security/ownership.html#transfer-ownership-using-the-firebolt-workspace) of Firebolt objects through the **Firebolt Workspace** user interface (UI). You can transfer ownership of individual objects or bulk transfer owned by a specific user. You can also delete individual objects or in bulk, helping to simplify the management of object ownership within the UI. + +### [](#performance-improvements)Performance Improvements + +**Enabled result and subresult caching for queries with window functions** +Enabled [result and subresult caching](/Overview/queries/understand-query-performance-subresult.html) for queries that contain [window functions](/sql_reference/functions-reference/window/), which can reduce query runtimes by storing previous results and enhance overall query performance and efficiency. + +### [](#bug-fixes)Bug Fixes + +**Fixed an issue where `CREATE VIEW` statements did not preserve the order of named function parameters** +An issue was resolved where [CREATE VIEW](/sql_reference/commands/data-definition/create-view.html) statements did not maintain the correct order of named function parameters, which could lead to syntax errors when querying the view. This fix improves query reliability by ensuring the proper order of function parameters. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes_archive.md b/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes_archive.md new file mode 100644 index 0000000..435ea8c --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_release_notes_release_notes_archive.md @@ -0,0 +1,1425 @@ +# [](#release-notes-archive)Release notes archive + +We provide an archive of release notes for your historical reference. + +- [Firebolt Release Notes - Version 4.17](#firebolt-release-notes---version-417) + + - [New Features](#new-features) + - [Performance Improvements](#performance-improvements) + - [Bug Fixes](#bug-fixes) +- [Firebolt Release Notes - Version 4.16](#firebolt-release-notes---version-416) + + - [New Features](#new-features-1) + - [Performance Improvements](#performance-improvements-1) + - [Bug Fixes](#bug-fixes-1) +- [Firebolt Release Notes - Version 4.15](#firebolt-release-notes---version-415) + + - [New Features](#new-features-2) + - [Performance Improvements](#performance-improvements-2) + - [Behavior Changes](#behavior-changes) + - [Bug Fixes](#bug-fixes-2) +- [Firebolt Release Notes - Version 4.14](#firebolt-release-notes---version-414) + + - [New Features](#new-features-3) + - [Performance Improvements](#performance-improvements-3) + - [Bug Fixes](#bug-fixes-3) +- [Firebolt Release Notes - Version 4.13](#firebolt-release-notes---version-413) + + - [New Features](#new-features-4) + - [Behavior Changes](#behavior-changes-1) + - [Bug Fixes](#bug-fixes-4) +- [Firebolt Release Notes - Version 4.12](#firebolt-release-notes---version-412) + + - [New Features](#new-features-5) + - [Performance Improvements](#performance-improvements-4) + - [Behavior Changes](#behavior-changes-2) + - [Bug Fixes](#bug-fixes-5) +- [Firebolt Release Notes - Version 4.11](#firebolt-release-notes---version-411) + + - [New Features](#new-features-6) +- [Firebolt Release Notes - Version 4.10](#firebolt-release-notes---version-410) + + - [New Features](#new-features-7) + - [Behavior Changes](#behavior-changes-3) + - [Bug Fixes](#bug-fixes-6) +- [Firebolt Release Notes - Version 4.9](#firebolt-release-notes---version-49) + + - [New Features](#new-features-8) + - [Performance Improvements](#performance-improvements-5) + - [Bug Fixes](#bug-fixes-7) +- [Firebolt Release Notes - Version 4.8](#firebolt-release-notes---version-48) + + - [New Features](#new-features-9) + - [Performance Improvements](#performance-improvements-6) + - [Bug Fixes](#bug-fixes-8) +- [DB version 4.7](#db-version-47) + + - [New Features](#new-features-10) + - [Performance Improvements](#performance-improvements-7) + - [Behavior Changes](#behavior-changes-4) + - [Bug Fixes](#bug-fixes-9) +- [DB version 4.6](#db-version-46) + + - [New Features](#new-features-11) + - [Behavior Changes](#behavior-changes-5) + - [Bug Fixes](#bug-fixes-10) +- [DB version 4.5](#db-version-45) + + - [New Features](#new-features-12) + - [Bug Fixes](#bug-fixes-11) +- [DB version 4.4](#db-version-44) + + - [New Features](#new-features-13) + - [Breaking Changes](#breaking-changes) + - [Bug Fixes](#bug-fixes-12) +- [DB version 4.3](#db-version-43) + + - [New Features](#new-features-14) + - [Performance Improvements](#performance-improvements-8) + - [Bug Fixes](#bug-fixes-13) + - [Breaking Changes](#breaking-changes-1) +- [DB version 4.2](#db-version-42) + + - [New features](#new-features-15) + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations) + - [Breaking Changes](#breaking-changes-2) +- [DB version 4.1](#db-version-41) + + - [Resolved issues](#resolved-issues) +- [DB version 4.0](#db-version-40) + + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations-1) + - [Breaking Changes](#breaking-changes-3) +- [DB version 3.34](#db-version-334) + + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations-2) + - [Resolved issues](#resolved-issues-1) +- [DB version 3.33](#db-version-333) + + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations-3) + - [Resolved issues](#resolved-issues-2) +- [DB version 3.32](#db-version-332) + + - [New features](#new-features-16) + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations-4) + - [Resolved issues](#resolved-issues-3) +- [DB version 3.31](#db-version-331) + + - [New features](#new-features-17) + - [Enhancements, changes and new integrations](#enhancements-changes-and-new-integrations-5) + - [Resolved issues](#resolved-issues-4) +- [DB version 3.30](#db-version-330) +- [DB version 3.29](#db-version-329) +- [DB version 3.28](#db-version-328) + +## [](#firebolt-release-notes---version-417)Firebolt Release Notes - Version 4.17 + +### [](#new-features)New Features + +**Introduced the `IF` function to enhance query readability and simplify conditional expressions** +The new [`IF`](/sql_reference/functions-reference/conditional-and-miscellaneous/if.html) function simplifies query writing as a more concise alternative to the `CASE WHEN` expression. +You can now use `IF(, , )` as a shorter equivalent to `CASE WHEN THEN ELSE END`. + +**Added `INCREMENTAL` index optimization with `VACUUM`** +The [`VACUUM`](/sql_reference/commands/data-management/vacuum.html) statement now supports an `INDEXES = INCREMENTAL` option, allowing incremental optimization of related indexes. This new mode uses fewer resources compared to a full reevaluation, improving index layouts. Although incremental optimization may not achieve the optimal layout of a full reevaluation, it maintains a balance between performance and resource usage. + +**Added `MAX_CONCURRENCY` option to `VACUUM` statement** +The `VACUUM` command now supports the `MAX_CONCURRENCY` option, enabling you to limit concurrent processes during optimization. This allows for control of the number of concurrent processes in a `VACUUM` operation, optimizing resource usage and improving performance in multi-threaded environments. + +**Added longitude wrapping for `GEOGRAPHY` data** +Firebolt now automatically wraps longitude values outside the range of -180 to 180 degrees when parsing `GEOGRAPHY` data from WKT, GeoJSON, WKB, or using the `ST_GeogPoint` function. For example, `POINT(180.5 1)` is now correctly interpreted as `POINT(-179.5 1)`. This improvement simplifies geographic data handling. + +**Enhanced the `EXPLAIN` function to support all SQL statements except for DDL and DCL** +The [`EXPLAIN`](/sql_reference/commands/queries/explain.html) feature now supports analysis of all SQL statements. However, it does not provide output details for DDL (Data Definition Language) and DCL (Data Control Language) statements. + +### [](#performance-improvements)Performance Improvements + +**Optimized `COPY FROM` filtering performance** +Filters applied to pseudo columns, such as `$SOURCE_FILE_NAME` and `$SOURCE_FILE_TIMESTAMP`, are now pushed down to the file listing during the `COPY FROM` process when using multiple URL and pattern locations. This enhancement improves performance by reducing unnecessary data processing and speeds up data loading operations. + +### [](#bug-fixes)Bug Fixes + +**Fixed latitude handling for `LineString` in WKT** +Fixed an issue where latitudes outside the valid range of -90 to 90 degrees, in `LineString` data were incorrectly accepted when parsing from WKT. For example, `LINESTRING(0.5 1, 1 90.5)` now correctly returns an error instead of being interpreted as `LINESTRING(0.5 1, -179 89.5)`. This fix enhances data integrity and prevents erroneous geographic entries. + +## [](#firebolt-release-notes---version-416)Firebolt Release Notes - Version 4.16 + +### [](#new-features-1)New Features + +**Added `MAX_CONCURRENCY` option to the `VACUUM` statement for enhanced concurrency control** +The [VACUUM](/sql_reference/commands/data-management/vacuum.html) statement now includes the `MAX_CONCURRENCY` option, allowing users to limit the number of concurrent streams. This improves control over resource usage during `VACUUM` operations. + +**Introduced the `INDEXES = ALL | NONE` for the `VACUUM` statement** +The [VACUUM](/sql_reference/commands/data-management/vacuum.html) statement now supports the `INDEXES = ALL | NONE` option, giving users control over whether indexes are optimized during `VACUUM` operations. + +**`VACUUM` now runs automatically** +Firebolt now automatically evaluates the data layout of tables and runs [VACUUM](/sql_reference/commands/data-management/vacuum.html) to optimize performance and storage efficiency. After [INSERT](/sql_reference/commands/data-management/insert.html), [UPDATE](/sql_reference/commands/data-management/update.html), or [DELETE](/sql_reference/commands/data-management/delete.html) operations modify data, the engine that performed the operation determines whether `VACUUM` is required. This decision is based on factors such as the number of deleted rows and the need to consolidate storage for faster query performance and reduced disk space usage. + +**Added support for casting text literals to interval literals** +Firebolt now supports casting text literals to interval literals using expressions like `'1 month'::INTERVAL`, making it easier to define time intervals in queries. + +**Added default value support for `GEOGRAPHY` columns** +Firebolt now supports default values for columns with the [GEOGRAPHY](/sql_reference/geography-data-type.html#geography-data-type) data type. For example, `CREATE TABLE geo_table (geo_column GEOGRAPHY DEFAULT 'GEOMETRYCOLLECTION EMPTY')` ensures consistency across database entries when no explicit value is provided. + +**Added `MIN_CLUSTERS` and `MAX_CLUSTERS` columns to `INFORMATION_SCHEMA.ENGINES`** +The [INFORMATION\_SCHEMA.ENGINES](/sql_reference/information-schema/engines.html) table now includes `MIN_CLUSTERS` and `MAX_CLUSTERS` columns, providing visibility into cluster configuration for improved database management. + +**Added support for `STATEMENT_TIMEOUT` to manage query run time limits** +Added support for `STATEMENT_TIMEOUT`. This feature specifies the number of milliseconds a statement is allowed to run. Any statement or query exceeding the specified time is canceled. A value of zero disables the timeout by default. Using `STATEMENT_TIMEOUT` helps prevent excessively long-running queries, improving system efficiency and resource use. + +**Added the PostgreSQL function `DATE()` as an alternative to `::DATE`** +Firebolt now supports the `DATE()` function, offering an alternative to the `::DATE` syntax for improved readability and usability in SQL queries. + +**Added support for `FROM` first syntax** +SQL queries can now use `FROM` before `SELECT`, allowing for more flexible query structures such as `FROM t SELECT a, SUM(b) GROUP BY a` or even `FROM t` without a `SELECT` clause. + +**Support for AWS PrivateLink is now in public preview** +[Firebolt now supports AWS PrivateLink](/Guides/security/privatelink.html), allowing Firebolt Enterprise customers to securely access the Firebolt API without exposing traffic to the public internet. AWS PrivateLink enhances security, minimizes data exposure, and improves network reliability by keeping traffic within AWS. + +**Added concurrency auto-scaling** +Engines can now be created with concurrency auto-scaling enabled, or modified to enable concurrency auto-scaling. Setting the `MIN_CLUSTERS` and `MAX_CLUSTERS` parameters on [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) and [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) commands turns on concurrency auto-scaling: the engine will dynamically resize between the specified `MIN_CLUSTERS` and `MAX_CLUSTERS` values to match demand. + +**Firebolt introduces three fully managed editions** + +Firebolt now offers **Standard, Enterprise, and Dedicated editions**, each designed for different capabilities, security, and scalability needs. + +- **Standard**: High-performance, elastic scaling – in and out, up and down – for cost-efficient, fully managed analytics on a single cluster. +- **Enterprise & Dedicated**: Includes scaling capabilities like **multi-cluster scaling**, as well as advanced security features like **AWS PrivateLink**. +- **Dedicated**: Built for regulated industries (finance, healthcare) with **single-tenant infrastructure** and compliance with **HIPAA, SOC 2, ISO**. + +Enterprise and Dedicated customers also get **24/7 support** with **faster support response times**, **Slack-based support**, and support from a **designated engineer**. For more information on Firebolt’s editions, refer to the [Pricing and billing](/Overview/billing/) page. + +### [](#performance-improvements-1)Performance Improvements + +**Introduced pruning for `GEOGRAPHY` columns at the tablet level to enhance query performance** +Firebolt now prunes [GEOGRAPHY](/sql_reference/geography-data-type.html#geography-data-type) data at the tablet level to enhance query performance. To activate spatial pruning on tables created before this release, run `VACUUM`. For additional details, see our [blog post](https://www.firebolt.io/blog/architecture-and-internal-representation-of-the-geography-data-type). + +**Added `INDEX_GRANULARITY` storage parameter to optimize table storage** +The `CREATE TABLE` statement now supports the `INDEX_GRANULARITY` storage parameter, allowing users to configure internal tablet range sizes for better performance based on query patterns. + +### [](#bug-fixes-1)Bug Fixes + +**Fixed permission conflicts on public schemas across multiple databases** +Resolved an issue where granting identical permissions on public schemas in different databases caused conflicts. This fix ensures correct permission application for improved database security. + +## [](#firebolt-release-notes---version-415)Firebolt Release Notes - Version 4.15 + +### [](#new-features-2)New Features + +**Improved `EXPLAIN (STATISTICS)` to include estimated row counts and column distinct counts** + +The [EXPLAIN (STATISTICS)](/sql_reference/commands/queries/explain.html) function now provides estimated row counts and column distinct counts, when available. This enhancement offers more detailed insights for analyzing query performance. + +**Added a Tableau connector for the current version of Firebolt** + +[Tableau](https://www.tableau.com/) is a visual analytics platform that empowers users to explore, analyze, and present data through interactive visualizations. The current Firebolt connector in Tableau Exchange supports only an older version of Firebolt. You can now download the latest connector directly from Firebolt and integrate it with [Tableau Desktop](https://www.tableau.com/products/desktop) or [Tableau Server](https://www.tableau.com/products/server). Follow the installation instructions in [Integrate with Tableau](/Guides/integrations/tableau.html) to set up the updated connector. + +**Added a DBeaver connector for the current version of Firebolt** + +[DBeaver](https://dbeaver.io/) is a free, open-source database administration tool that supports multiple database types, provides a graphical interface for managing databases, running queries, and analyzing data. You can now connect to DBeaver using the [Firebolt JDBC driver](https://docs.firebolt.io/Guides/developing-with-firebolt/connecting-with-jdbc.html). Follow the instructions in [Integrate with DBeaver](/Guides/integrations/dbeaver.html) to set up a connection to DBeaver. + +**Added the Firebolt Resource Center to the Firebolt Workspace** + +The [Firebolt Resource Center](https://www.firebolt.io/resources) is now accessible from the **Firebolt Workspace**. Select the Firebolt icon in the bottom-right corner to access resources including the [Get started guide](/Guides/getting-started/), [Knowledge Center](https://www.firebolt.io/knowledge-center), [Documentation](https://docs.firebolt.io/), [Release notes](/Reference/release-notes/release-notes.html), Announcements, and a unified search tool covering all Firebolt resources. + +### [](#performance-improvements-2)Performance Improvements + +**Optimized `LEFT JOIN` conversion for better query performance** + +A nested `LEFT JOIN` can now be automatically replaced with a more efficient join when its results are not needed due to filtering in a later step. This optimization occurs when a `LEFT JOIN` removes rows where the right-hand side contains `NULL` values, effectively discarding the extra rows introduced by the earlier `LEFT JOIN`. In such cases, simplifying the join structure improves efficiency without altering query results. This conversion reduces unnecessary operations, lowering computational overhead and enhancing performance. + +**Improved performance by allowing multiple `INSERT INTO VALUES ...` statements to be combined in a single request** + +Workloads that send multiple consecutive `INSERT INTO VALUES ...` statements into the same table can now run much faster by sending all statements in a single request separated by semicolons. These statements are now automatically merged and processed together on the server within a single transaction, which means that either all of them succeed or fail. This improvement reduces network overhead and enhances performance for batch data insertion. + +### [](#behavior-changes)Behavior Changes + +**Use `NULL` instead of empty strings for passing unset TVF parameters** + +Table-valued functions (TVFs) such as [LIST\_OBJECTS](/sql_reference/functions-reference/table-valued/list-objects.html), [READ\_PARQUET](/sql_reference/functions-reference/table-valued/read_parquet.html), and [READ\_CSV](/sql_reference/functions-reference/table-valued/read_csv.html) that accept string named parameters like `aws_access_key_id` and `aws_role_arn` will no longer treat empty strings (`''`) as unset arguments. The empty strings will instead be forwarded to the credential provider and may return errors. If you want to pass an explicitly unset parameter, use `NULL` instead. + +### [](#bug-fixes-2)Bug Fixes + +**Resolved issue in distributed `GROUP BY` and `JOIN` planning** + +Resolved a bug in the optimization process for distributed `GROUP BY` and `JOIN` operators. This bug sometimes led to missed optimization opportunities and, in rare cases, incorrect results. + +**Fixed a bug in correlated `EXISTS` subqueries that caused duplicated outer tuples in query results** + +Fixed a bug with non-trivial correlated `EXISTS` subquery, which is a dependent subquery inside an `EXISTS` condition that references a column from an outer query. An example of this kind of query follows: + +``` +SELECT *, + EXISTS(SELECT 1 FROM table2 where COALESCE(table2.col_1, table2.col_2) = table1.col_1) +FROM table1 +``` + +Previously, if an outer table contained a value, and the inner table had two matching values, the outer table’s row would appear twice in the final result instead of just once. This happened because the query checked for matches individually for each row in the inner table, rather than treating the condition as a simple existence check. + +This bug fix corrected this issue by ensuring that the `EXISTS` condition only determines whether at least one match exists, without duplicating rows in the outer table. Now, each row in the outer table correctly appears once, with `TRUE` if a match exists and `FALSE` otherwise, improving the accuracy of query results. + +## [](#firebolt-release-notes---version-414)Firebolt Release Notes - Version 4.14 + +### [](#new-features-3)New Features + +**Added `E2E_DURATION_US` to include total query time in Firebolt infrastructure for enhanced performance monitoring and optimization** + +Added a new column `E2E_DURATION_US` in the system tables `INFORMATION_SCHEMA.ENGINE_RUNNING_QUERIES`, `INFORMATION_SCHEMA.ENGINE_QUERY_HISTORY`, and `INFORMATION_SCHEMA.ENGINE_USER_QUERY_HISTORY` which shows the total time a query has spent within the Firebolt infrastructure. In contrast, `DURATION_US` measures only the time spent using the engine without considering retries or routing. The `E2E_DURATION_US` metric measures the total time a query takes from initiation to final result delivery, and includes all sub-components of latency such as routing, preparation, queuing, compilation, retries, and runtimes. For example, if a query starts a stopped engine, the engine’s startup time is included in the query’s end-to-end duration. This update provides a more accurate representation of total query latency, for performance monitoring and optimization. + +**Unhid `scanned_storage_bytes` and `scanned_cache_bytes` from information schema views** + +Unhid `scanned_storage_bytes` and `scanned_cache_bytes` columns from `information_schema.engine_query_history` and `information_schema.engine_user_query_history` views. These columns were previously accessible when explicitly used in a `SELECT` clause, but will now appear by default when you use `SELECT *`. + +### [](#performance-improvements-3)Performance Improvements + +**Enhanced data ingestion performance for `GEOGRAPHY` objects of type `POINT`** + +Improved data loading performance for `GEOGRAPHY` objects of type `POINT`, enabling up to four times faster loading of geographical point data for more efficient data integration and analysis. + +**Improved file listing times for large external scans** + +In operations that read data from Amazon S3 buckets such as external table scans or `COPY FROM` queries, Firebolt lists files in a URL to an Amazon S3 bucket. This process is constrained by the AWS API, which limits file listing to 1,000 files per request. Firebolt has increased the number of concurrent operations so that listing a large number of files is up to 3.5 times faster. + +**Added result cache support for cross and complex joins for improved performance** + +The [query result cache](/Reference/system-settings.html#result-cache) now supports queries using cross joins or complex joins with `OR` conditions and inequalities. This change reduces redundant calculations, improving query performance. + +### [](#bug-fixes-3)Bug Fixes + +**`USAGE` permissions are now required to access `INFORMATION_SCHEMA` views** + +Accessing `INFORMATION_SCHEMA` views now requires `USAGE` permissions on the database. Queries to `INFORMATION_SCHEMA` will fail if these permissions are missing, ensuring consistent enforcement across permission-restricted queries. Ensure that your database has the necessary permissions to prevent access issues. + +**Improved `EXPLAIN` command accuracy for default values of `DATE`, `TIMESTAMP`, and `TIMESTAMPTZ` columns** + +The `EXPLAIN` command now displays default values for columns of type `DATE`, `TIMESTAMP`, and `TIMESTAMPTZ` columns. This update fixes a bug that previously caused default values to be shown incompletely, improving clarity and accuracy in query plan analysis. + +**Resolved filtering issue for views in `information_schema.tables` to enforce user permissions** + +Fixed a bug in `information_schema.tables` which previously listed views that users were not authorized to access. Even though querying these views would fail, users could still see that they existed. Now `information_schema.tables` only lists views that users are allowed to access. + +## [](#firebolt-release-notes---version-413)Firebolt Release Notes - Version 4.13 + +### [](#new-features-4)New Features + +**`GRANT ALL ON ACCOUNT` and `REVOKE ALL ON ACCOUNT` statements for role-based privileges** +The statements `GRANT ALL ON ACCOUNT account_name TO role_name` and `REVOKE ALL ON ACCOUNT account_name FROM role_name` are now supported. They grant or revoke all account-related privileges to the specified role `role_name`. + +**Support for nested arrays in Parquet files** +You can now ingest Parquet files containing nested array structures at any depth. For example: `array(array(array(string)))`. + +### [](#behavior-changes-1)Behavior Changes + +**Removed secured objects from `information_schema` views** +Users can now only access information about objects for which they have the appropriate permissions or ownership for in [information\_schema views](/sql_reference/information-schema/views.html). + +### [](#bug-fixes-4)Bug Fixes + +**`@` character support restored in usernames** +The usage of character `@` is allowed in usernames again, which was previously restricted. The following statements are now valid and will not cause errors: + +``` +CREATE USER "user@example.com"; +ALTER USER user_name RENAME TO "user@example.com"; +``` + +**Resolved memory overuse during CSV import** +Resolved a memory overconsumption problem that occurred when importing CSV files into existing tables. + +**Resolved `EXPLAIN VACUUM` and `EXPLAIN` to improve error handling and result accuracy** +The following behavior of `EXPLAIN VACUUM` has been updated: + +1. If a table is fully vacuumed, no further actions are performed, and the message “Table is fully vacuumed, no vacuum jobs were executed” is returned to the user. +2. The `EXPLAIN VACUUM` output no longer returns an empty result when the vacuumed object is an aggregating index. +3. `EXPLAIN` has been updated to show an error if the specified relation does not exist. + +**Fixed incorrect evaluation of `IS NULL` in outer joins** +Fixed an issue where `IS NULL` predicates on non-nullable columns from the non-preserving side of an outer join were incorrectly reduced to `FALSE` during common table expression (CTE) optimization. When the optimizer attempted to fuse multiple CTEs, it mistakenly replaced `t2.x IS NULL` with `FALSE`, altering query semantics and producing incorrect results. This occurred because `t2.x`, though defined as non-nullable, became nullable when used in a left join. The fix ensures that `IS NULL` predicates are correctly preserved during optimization. + +## [](#firebolt-release-notes---version-412)Firebolt Release Notes - Version 4.12 + +### [](#new-features-5)New Features + +**Added `ST_S2CELLIDFROMPOINT` to retrieve the [S2 Cell ID](http://s2geometry.io/devguide/s2cell_hierarchy) of a `GEOGRAPHY` Point** + +You can now use [ST\_S2CELLIDFROMPOINT](/sql_reference/functions-reference/geospatial/st_s2cellidfrompoint.html) to retrieve the S2 cell ID, which identifies the region on Earth that fully contains, or covers, a single Point `GEOGRAPHY` object. You can also specify a cell resolution level. + +**Added keyboard shortcuts to the Firebolt Develop Space** + +The Firebolt **Develop Space** user interface added the following Windows/Mac [keyboard shortcuts](/Guides/query-data/using-the-develop-workspace.html#keyboard-shortcuts-for-the-develop-space): + +- Ctrl + Alt + E / Ctrl + ⌘ + E – Toggle expanding or collapsing query results. +- Ctrl + Alt + N / Ctrl + ⌘ + N – Create a new script. +- Ctrl + Alt + [ / Ctrl + ⌘ + [ – Jump to the previous script. +- Ctrl + Alt + ] / Ctrl + ⌘ + ] – Jump to the next script. + +**Introduced the `INFORMATION_SCHEMA.ROUTINES` view for built-in functions and operators** + +Added the [INFORMATION\_SCHEMA.ROUTINES](/sql_reference/information-schema/routines.html) view to return information about all of Firebolt’s built-in functions and operators including their database, schema, name, type, return data type, parameter data types, and whether they are deterministic. + +**Added support for the `GEOGRAPHY` data type in external tables using CSV and JSON formats** + +Firebolt can now read columns of type `GEOGRAPHY` from external tables in CSV or JSON format, which allows the querying of geospatial data including Points and Polygons. + +**Added a new `MONITOR USAGE` privilege** + +You can use the `MONITOR USAGE` privilege to view all queries running on an engine using [information\_schema.engine\_query\_history](/sql_reference/information-schema/engine-query-history.html) or [information\_schema.engine\_running\_queries](/sql_reference/information-schema/engine-running-queries.html) views. + +**Introduced support for network policy `ADD`/`REMOVE` commands** +Admins can now append or remove specific IP addresses in `ALLOW` or `BLOCK` lists without overriding existing values. This update simplifies network policy management when handling large IP lists and reduces the risk of concurrent updates overwriting each other. + +### [](#performance-improvements-4)Performance Improvements + +**Improved performance of the `ST_COVERS`, `ST_CONTAINS`, and `ST_INTERSECTS` functions** + +Optimized the [ST\_COVERS](/sql_reference/functions-reference/geospatial/st_covers.html), [ST\_CONTAINS](/sql_reference/functions-reference/geospatial/st_contains.html), and [ST\_INTERSECTS](/sql_reference/functions-reference/geospatial/st_intersects.html) functions to improve performance when processing LineStrings and Points with non-intersecting inputs, and Polygons with inputs that do not intersect their boundaries. + +**Improved performance of the `REGEXP_LIKE_ANY` function** + +The [REGEXP\_LIKE\_ANY](/sql_reference/functions-reference/string/regexp-like-any.html) function now performs more efficiently when matching against multiple patterns by compiling a single combined [RE2](https://github.com/google/re2/) regular expression object instead of evaluating each pattern separately. + +### [](#behavior-changes-2)Behavior Changes + +**Updated user name rules to improve consistency and validation** + +The following changes affect the use of user names in [CREATE USER](/sql_reference/commands/access-control/create-user.html) AND [ALTER USER](/sql_reference/commands/access-control/alter-user.html): + +- The `@` character is no longer allowed in user names. +- The range of permissible characters in user names is expanded. For more information, see [CREATE USER](/sql_reference/commands/access-control/create-user.html). +- When renaming a user with [ALTER USER](/sql_reference/commands/access-control/alter-user.html) `old_name RENAME TO new_name`, the `new_name` must now comply with the updated user name rules. +- Any new names created with [CREATE USER](/sql_reference/commands/access-control/create-user.html) must now comply with the updated user name rules. + +### [](#bug-fixes-5)Bug Fixes + +**Fixed an error where `APACHE_DATASKETCHES_HLL_ESTIMATE` failed for `NULL` inputs** + +Resolved an error in the [APACHE\_DATASKETCHES\_HLL\_ESTIMATE](/sql_reference/functions-reference/datasketches/apache-datasketches-hll-estimate.html) function that occurred if any of its input values were `NULL`. The function can now process `NULL` inputs. + +**Resolved issue that allowed account lockout on last login** + +Fixed an issue where the `ALTER USER SET LOGIN/SERVICE_ACCOUNT=...` statement could lock out the only active login in an account, rendering the account inaccessible. The operation now fails with an explicit error message in such cases. + +**Fixed incorrect ownership modification for `information_schema`** + +The statement `ALTER SCHEMA information_schema SET OWNER owner_name;` previously succeeded, which was incorrect, because `information_schema` cannot be modified. The operation now fails with an explicit error message. + +**Fixed an out-of-memory error during large CSV imports** + +Updated the ingestion pipeline for [COPY FROM](/sql_reference/commands/data-management/copy-from.html) to ensure that large CSV files without a predefined schema can load into new tables without causing memory errors. This error did not affect external tables. + +**Prevent running queries when using a dropped database** + +When the current database does not exist, such as when it has been dropped, most queries fail as expected. We fixed a bug where some queries against specific `information_schema` views, such as `engines`, `catalogs`, `applicable_roles`, would still succeed in such cases. These queries now fail consistently, like all other queries against a non-existent database. For example, running `SELECT * FROM information_schema.engines` when the database is dropped previously worked, but now fails. + +## [](#firebolt-release-notes---version-411)Firebolt Release Notes - Version 4.11 + +### [](#new-features-6)New Features + +**Introduced the `GEOGRAPHY` data type and functions for geospatial data handling \[public preview]** + +Added a new [GEOGRAPHY](/sql_reference/geography-data-type.html) data type and functions for working with geospatial data. Firebolt supports the three industry standard formats [Well-Known Text (WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry), [Well-Known Binary (WKB)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary), and [GeoJSON](https://datatracker.ietf.org/doc/html/rfc7946) for geospatial data. + +This public preview release includes the following functions: + +- [ST\_ASBINARY](/sql_reference/functions-reference/geospatial/st_asbinary.html) – Converts shapes of the `GEOGRAPHY` data type to the [Well-Known Binary (WKB)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary) format for geographic objects. +- [ST\_ASEWKB](/sql_reference/functions-reference/geospatial/st_asewkb.html) – Converts shapes of the `GEOGRAPHY` data type to the [extended Well-Known Binary (EWKB)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Format_variations) format using Spatial Reference Identifier (SRID) 4326, which corresponds to the [WGS84](https://en.wikipedia.org/wiki/World_Geodetic_System#WGS_84) coordinate system. +- [ST\_ASGEOJSON](/sql_reference/functions-reference/geospatial/st_asgeojson.html) – Converts shapes of the `GEOGRAPHY` data type to the [GeoJSON](https://datatracker.ietf.org/doc/html/rfc7946) format. +- [ST\_ASTEXT](/sql_reference/functions-reference/geospatial/st_astext.html) – Converts shapes of the `GEOGRAPHY` data type to the [Well-Known Text (WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format. +- [ST\_CONTAINS](/sql_reference/functions-reference/geospatial/st_contains.html) – Determines if one `GEOGRAPHY` object fully contains another. +- [ST\_COVERS](/sql_reference/functions-reference/geospatial/st_covers.html) – Determines if one `GEOGRAPHY` object fully encompasses another. +- [ST\_DISTANCE](/sql_reference/functions-reference/geospatial/st_distance.html) – Calculates the shortest distance, measured as a geodesic arc between two `GEOGRAPHY` objects, measured in meters. +- [ST\_GEOGFROMGEOJSON](/sql_reference/functions-reference/geospatial/st_geogfromgeojson.html) – Constructs a `GEOGRAPHY` object from a [GeoJSON](https://datatracker.ietf.org/doc/html/rfc7946) string. +- [ST\_GEOGFROMTEXT](/sql_reference/functions-reference/geospatial/st_geogfromtext.html) – Constructs a `GEOGRAPHY` object from a [Well-Known Text (WKT)](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) string. +- [ST\_GEOGFROMWKB](/sql_reference/functions-reference/geospatial/st_geogfromwkb.html) – Constructs a `GEOGRAPHY` object from a [Well-Known Binary](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary) (WKB) byte string. +- [ST\_GEOGPOINT](/sql_reference/functions-reference/geospatial/st_geogpoint.html) – Constructs a Point in the `GEOGRAPHY` data type created from specified longitude and latitude coordinates. +- [ST\_INTERSECTS](/sql_reference/functions-reference/geospatial/st_intersects.html) – Determines whether two input `GEOGRAPHY` objects intersect each other. +- [ST\_X](/sql_reference/functions-reference/geospatial/st_x.html) – Extracts the longitude coordinate of a `GEOGRAPHY` Point. +- [ST\_Y](/sql_reference/functions-reference/geospatial/st_y.html) – Extracts the latitude coordinate of a `GEOGRAPHY` Point. + +**Added keyboard shortcuts to the Firebolt Develop Space** + +The user interface in the Firebolt **Develop Space** added the following [keyboard shortcuts](/Guides/query-data/using-the-develop-workspace.html#keyboard-shortcuts-for-the-develop-space): + +- Cmd + Enter – Runs the current query. +- Cmd+Shift+Enter – Runs all queries in a script. + +**Added the window function `FIRST_VALUE`** + +Added a new [FIRST\_VALUE](/sql_reference/functions-reference/window/first-value.html) window function that returns the first value evaluated in a specified window frame. + +## [](#firebolt-release-notes---version-410)Firebolt Release Notes - Version 4.10 + +### [](#new-features-7)New Features + +**Added `CREATE TABLE CLONE` to clone an existing table in a database** + +You can create a clone of an existing table in a database using `CREATE TABLE CLONE`, which is extremely fast because it copies the table structure and references without duplicating the data. The clone functions independently of the original table. Any changes to the data or schema of either table will not affect the other. + +**Added 3-part identifier support for specifying databases in queries** + +You can now reference a database other than the current one in queries by using 3-part identifiers, which specify the database, schema, and object. For example, even if you previously selected a database `db` by using `USE DATABASE db`, you can still query a different database by using a query such as `SELECT * FROM other_db.public.t`. The limitation still exists that every query only addresses a single database. + +**Added `ALTER TABLE ADD COLUMN` to add a column to an existing table** + +You can now use `ALTER TABLE ADD COLUMN` to add columns to Firebolt-managed tables. This functionality is temporarily limited to tables that were created on Firebolt version 4.10 or higher. + +**Added support of `ALTER TABLE RENAME` command** You can use `ALTER TABLE RENAME` to change the name of Firebolt-managed tables. This functionality is temporarily limited to tables created on Firebolt version 4.10 or higher. + +**Added support for external file access using AWS session tokens** + +You can now use `` with access keys to securely authenticate and access external files on AWS with the following features: + +- The [COPY TO](/sql_reference/commands/data-management/copy-to.html) and [COPY FROM](/sql_reference/commands/data-management/copy-from.html) commands. +- [External tables](/Guides/loading-data/working-with-external-tables.html) located in an Amazon S3 bucket. +- The following table-valued functions: `read_parquet`, `read_csv`, and `list_objects`. + +### [](#behavior-changes-3)Behavior Changes + +**Enhanced PostgreSQL compliance for casting data types from text to float** + +Cast from text to floating-point types is now compliant with PostgreSQL with the following improvements: + +1. **The correct parsing of positive floats** – A plus sign (`+`) preceding a float is now handled correctly. Example: `'+3.4'`. +2. **Exponent-only input** – Float values starting with an exponent `'e'` or `'E'` are rejected. Example: `'E4'`. +3. **Incomplete exponents** – Float values ending with an exponent without a subsequent exponent value are rejected. Example: `'4e+'`. + +**Account-level rate limits implemented for the system engine** + +Firebolt has implemented account-level rate limits to ensure equitable resource usage among all users of the system engine. When these limits are exceeded, requests will be rejected with the following error message: `429: Account system engine resources usage limit exceeded`. This rate limit targets accounts with exceptionally high resource consumption. Accounts with typical resource usage should not be affected and require no further action. + +### [](#bug-fixes-6)Bug Fixes + +**Corrected runtime reporting** + +Resolved an issue where the runtime displayed in Firebolt’s user interface and JSON responses omitted including processing times for some query steps. + +**Resolved “Invalid Input Aggregate State Type” error with aggregating indexes** + +Fixed an issue where the “invalid input aggregate state type” error could occur when queries read from aggregating indexes that defined a `COUNT(*)` aggregate function before other aggregate functions. After this fix, such aggregating indexes can now be queried correctly without needing to be rebuilt. + +**Fixed a rare bug in subresult caching logic** + +Addressed a rare issue in the logic for caching and reusing subresults that could cause query failures with specific query patterns. This issue did not impact the correctness of query results. + +**Resolved issue preventing schema owners from granting “ANY” privileges** + +Fixed an issue where schema owners were unable to grant “ANY” privileges on their schema to other users. +For example: + +``` +GRANT SELECT ANY ON SCHEMA public TO ... +``` + +Schema owners can now execute this command which allows the specified user or role to perform SELECT operations on any table. + +## [](#firebolt-release-notes---version-49)Firebolt Release Notes - Version 4.9 + +### [](#new-features-8)New Features + +**Added the `enable_result_cache` setting for controlling query result caching during benchmarking** + +You can set `enable_result_cache` to `FALSE` to disable the use of Firebolt’s result cache, which is set to `TRUE` by default. Disabling result cashing can be useful for benchmarking query performance. When `enable_result_cache` is disabled, resubmitting the same query will recompute the results rather than retrieving them from cache. For more information, see [Result Cache](/Reference/system-settings.html#result-cache). + +**Added `LAG` and `LEAD` support for negative offsets.** + +The second parameter in both [LAG](/sql_reference/functions-reference/window/lag.html) and [LEAD](/sql_reference/functions-reference/window/lead.html) can now accept negative numbers. Given a negative number, a `LAG` will become a `LEAD` and vice versa. For example, `LAG(x,-5,3)` is the same as `LEAD(x,5,3)`. + +### [](#performance-improvements-5)Performance Improvements + +**Faster string searches for case-insensitive simple regular expressions in `REGEXP_LIKE`** + +Simple regular expressions in [REGEXP\_LIKE](/sql_reference/functions-reference/string/regexp-like.html) with case-insensitive matching, using the `i` flag, now use the same optimized string search implementation as [ILIKE](/sql_reference/functions-reference/string/ilike.html), achieving up to three times faster runtimes in observed cases. + +### [](#bug-fixes-7)Bug Fixes + +**Empty character classes in regular expressions** + +Fixed a rare case where empty character classes were mistakenly interpreted as valid character classes instead of being treated as raw characters. In cases like `[]a]`, the expression is now correctly interpreted as a pattern that matches any single character from the list `]a`, rather than treating `[]` as an empty character class followed by `a]`. + +**Trailing backslash in regular expressions** + +Fixed a rare case where invalid regular expressions with a trailing backslash `\` were accepted. + +## [](#firebolt-release-notes---version-48)Firebolt Release Notes - Version 4.8 + +### [](#new-features-9)New Features + +**Introduced new bitwise shift functions `BIT_SHIFT_RIGHT` and `BIT_SHIFT_LEFT`** + +The following bitwise shift functions are now supported: + +- `BIT_SHIFT_RIGHT` shifts the bits of a number to the right by a specified number of positions, which effectively divides the number by `2` for each position shifted. +- `BIT_SHIFT_LEFT` shifts the bits of a number to the left by a specified number of positions, which effectively multiples the number by `2` for each position shifted. + +**Introduced new trigonometric functions `ACOS`, `ATAN`, `ASIN`, `COS`, `COT`, `TAN`, `DEGREES`, and `PI`** + +The following trigonometric functions are now supported: + +- `ACOS` calculates the arccosine of a value in radians. +- `ATAN` calculates the arctangent of a value in radians. +- `ASIN` calculates the arcsine of a value in radians. +- `COS` calculates the cosine of a value in radians. +- `COT` calculates the cotangent of a value in radians. +- `TAN` calculates the tangent of a value in radians. +- `DEGREES` converts a value in radians to degrees. +- `PI` returns π as a value of type `DOUBLE PRECISION`. + +**Introduced the `timezone` query-level setting with `time_zone` as an alias** + +Added the `timezone` query-level setting. The previous `time_zone` query setting still works, and is now an alias for `timezone`. + +**Introduced new `PERCENTILE_CONT` and `MEDIAN` aggregate functions** + +Added the following aggregate functions: + +- `PERCENTILE_CONT` calculates a specified percentile of values in an ordered dataset. +- `MEDIAN` returns the median of a given column. It is equivalent to `PERCENTILE_CONT(0.5)`: half the values in the column are smaller, and half are bigger than the returned value. If the number of values in the column is even, `MEDIAN` returns the arithmetic mean of the two middle values. + +**Added support to meet HIPAA regulations for health information** + +Added [support to meet federal HIPAA regulations](/Overview/Security/security.html#hipaa-compliance) to ensure the confidentiality, integrity, and availability of electronic protected health information within the Firebolt platform. + +### [](#performance-improvements-6)Performance Improvements + +**Improved expression comparison logic within queries** + +Improved expression comparison logic to better recognize identical expressions within queries. This enhancement supports a broader range of queries and boosts the overall quality of query plans. + +**Improving cold reads by reducing the amount of Amazon S3 requests needed to load data** + +Improved the performance of cold reads by minimizing the number of Amazon S3 requests required to load data. In the case of tiny tablets, this improvement lead to a 50% improvement in performance. + +### [](#bug-fixes-8)Bug Fixes + +**Fixed a bug preventing view creation with type conversions to array types** + +Fixed an issue that prevented users from creating database views that involve type conversion to array types. + +## [](#db-version-47)DB version 4.7 + +### [](#new-features-10)New Features + +**Added Snappy compression support to the COPY TO command for PARQUET output format** +You can now apply Snappy compression, which is faster than GZIP, when using `COPY TO` with `TYPE=PARQUET`. Specify `COMPRESSION=SNAPPY` within `COPY TO` to enable this. + +**Added `information_schema.engine_user_query_history` view to log only user-initiated queries** +Added a new query history view, `information_schema.engine_user_query_history`, which shows all queries initiated by users. This view filters information from `information_schema.engine_query_history` view, which logs all engine queries including system-generated ones like UI updates and page-load requests. + +**Added support for `information_schema.enabled_roles`** +Added a new view `information_schema.enabled_roles` which lists the roles available in the account. + +**Added a system setting `enable_subresult_cache` for controlling subresult reuse** +A new system setting `enable_subresult_cache` allows users to enable or disable caching of query subresults for subsequent reuse. Caching remains enabled by default. This setting allows users to temporarily disabling caching, e.g. for benchmarking purposes. + +**Added “FROM first” syntax allowing the `FROM` clause to precede the `SELECT` clause** +Added support for the “FROM first” syntax, which allows placing the `FROM` clause before the `SELECT` clause, for example `FROM t SELECT a, SUM(b) GROUP BY a`. You can now also omit the `SELECT` clause, as in `FROM t`. + +**Introduced a new function `GEN_RANDOM_UUID_TEXT` to generate a universally unique identifier (UUID)** +The new function `GEN_RANDOM_UUID_TEXT` accepts no arguments and returns a version `4` UUID as defined by [RFC-4122](https://tools.ietf.org/html/rfc4122#section-4.4) as a `TEXT` value. + +**Introduced `~` and `!~` operators as aliases for `REGEXP_LIKE` and `NOT REGEXP_LIKE`** +Added the `~` operator as an alias for `REGEXP_LIKE`, and the `!~` operator, which serves as an alias for `NOT REGEXP_LIKE`. + +**Introduced JSON functions `JSON_POINTER_EXTRACT_KEYS`, `JSON_POINTER_EXTRACT_VALUES`, `JSON_POINTER_EXTRACT_TEXT`** +The following new JSON functions are now supported: + +- `JSON_POINTER_EXTRACT_KEYS` extracts keys from a JSON object +- `JSON_POINTER_EXTRACT_VALUES` extracts values from a JSON object +- `JSON_POINTER_EXTRACT_TEXT` extracts the JSON string value as SQL TEXT + +**Introduced trigonometric functions `RADIANS`, `SIN`, `ATAN2`** +The following trigonometric functions are now supported: + +- `RADIANS` to convert degrees into radians +- `SIN` to compute the sine in radians +- `ATAN2` to calculate the arctangent with two arguments. `ATAN2(y,x)` is the angle between the positive x-axis and the line from the origin to the point `(x,y)`, expressed in radians. + +**Introduced new functions to calculate standard deviation and variance for both samples and populations** +New functions that accept `REAL` and `DOUBLE` inputs and return standard deviations and variances: + +- `STDDEV_SAMP` - Returns the sample standard deviation of all non-`NULL` numeric values produced by an expression, which measures how spread out values are in a sample. +- `STDDEV_POP` - Returns the population standard deviation of all non-`NULL` numeric values produced by an expression, which measures how spread out values are in an entire population. +- `VAR_SAMP` - Returns the sample variance of all non-`NULL` numeric values produced by an expression, which measures the average of the squared differences from the sample mean, indicating how spread out the values are within a sample. +- `VAR_POP` - Returns the population variance of all non-`NULL` numeric values produced by an expression. The population variance measures the average of the squared differences from the population mean, indicating how spread out the values are within the entire population. + +**Introduced new array functions `ARRAY_ALL_MATCH` and `ARRAY_ANY_MATCH`** +The new functions `ARRAY_ALL_MATCH` and `ARRAY_ANY_MATCH` accept an (optional) lambda function and an array and return `TRUE` if all elements (`ARRAY_ALL_MATCH`) or any element (`ARRAY_ANY_MATCH`) satisfy the lambda condition, and `FALSE` otherwise. When no lambda is passed, the array has to be of type `BOOLEAN`, and the identity lambda `x -> x` is used. + +### [](#performance-improvements-7)Performance Improvements + +**Improved performance of `JSON_EXTRACT`, `JSON_EXTRACT_ARRAY`, and `JSON_VALUE` functions** +Enhanced the performance of the `JSON_EXTRACT`, `JSON_EXTRACT_ARRAY`, and `JSON_VALUE` functions. + +### [](#behavior-changes-4)Behavior Changes + +**Updated sorting method for array columns with `NULL` values to align with PostgreSQL behavior** + +The sorting method for array columns containing `NULL` values has been updated to ensure that `ASC NULLS FIRST` places `NULL` values before arrays, and `DESC NULLS LAST` places `NULL` values after arrays, which aligns with PostgreSQL behavior. + +The following code example creates a temporary table `tbl` which contains three rows: a `NULL` array, an array with the value `1`, and an array with a `NULL` element. Then, a `SELECT` statement sorts all rows in ascending order: + +``` +WITH tbl(i) AS ( + SELECT NULL::INT[] + UNION ALL + SELECT ARRAY[1]::INT[] + UNION ALL + SELECT ARRAY[NULL]::INT[] +) +SELECT * FROM tbl ORDER BY i ASC NULLS FIRST; +``` + +The query previously returned `{NULL}, {1}, NULL`, but now returns `NULL, {1}, {NULL}`. + +`NULLS FIRST` and `NULLS LAST` apply to the array itself, not to its elements. By default, ascending order (`ASC`) assumes `NULLS LAST`, while descending order (`DESC`) assumes `NULLS FIRST` when sorting arrays. + +**Allowed use of the SESSION\_USER function without parentheses** + +The `SESSION_USER` function can now be used without parentheses, like this: `SELECT SESSION_USER`. As a result, any column named `session_user` now needs to be enclosed in double quotes as follows: `SELECT 1 AS "session_user"` or `SELECT "session_user" FROM table`. + +### [](#bug-fixes-9)Bug Fixes + +**Corrected JSON output format to display NaN values consistently as `nan`** +The JSON output format previously showed some NaN values as `-nan`. This was corrected to consistently display NaN values as `nan` in the JSON output. + +**Resolved an issue with `CHECKSUM` and `HASH_AGG` failing when combining literals and table columns** +Fixed an issue where the `CHECKSUM` and `HASH_AGG` functions failed when used with a combination of literals and table columns. + +**Fixed a rare inaccuracy that could cause incorrect results on multi-node engines when performing certain `UNION ALL` operations** +Fixed a rare inaccuracy when performing certain `UNION ALL` operations on subqueries that are the result of aggregations or joins on overlapping but distinct keys, followed by an aggregation or join on the common keys of the subqueries’ aggregations or joins. + +**Fixed a rare inaccuracy that could cause incorrect results with CTEs using `RANDOM()` in specific join scenarios** +Fixed a rare inaccuracy that caused incorrect results when a common table expression using the `RANDOM()` function was used multiple times, and at least one of these uses was on the probe side of a join involving a primary index key of the underlying table. + +## [](#db-version-46)DB version 4.6 + +**September 2024** + +### [](#new-features-11)New Features + +**`COPY TO` support for the `SNAPPY` compression type** + +[COPY TO](/sql_reference/commands/data-management/copy-to.html) now supports `SNAPPY` as a new compression option for Parquet files. This enhancement offers greater flexibility for managing file size and performance, particularly for workloads requiring faster compression. Each file is written in Parquet format, with the specified compression applied to the data pages in the column chunks. + +**`COPY FROM` support for filtering by source file metadata** + +[COPY FROM](/sql_reference/commands/data-management/copy-from.html) now supports filtering by source file metadata using the `WHERE` clause. + +**Added support for vector distance calculations with new functions** + +Firebolt has added support for vector distance and similarity calculations with the following new functions: [VECTOR\_COSINE\_DISTANCE](/sql_reference/functions-reference/vector/vector-cosine-distance.html), [VECTOR\_MANHATTAN\_DISTANCE](/sql_reference/functions-reference/vector/vector-manhattan-distance.html), [VECTOR\_EUCLIDEAN\_DISTANCE](/sql_reference/functions-reference/vector/vector-euclidean-distance.html), [VECTOR\_SQUARED\_EUCLIDEAN\_DISTANCE](/sql_reference/functions-reference/vector/vector-squared-euclidean-distance.html), [VECTOR\_COSINE\_SIMILARITY](/sql_reference/functions-reference/vector/vector-cosine-similarity.html), and [VECTOR\_INNER\_PRODUCT](/sql_reference/functions-reference/vector/vector-inner-product.html). + +### [](#behavior-changes-5)Behavior Changes + +**Introduced `SHOW CATALOGS` statement and aliased `SHOW DATABASES` to it while deprecating `SHOW DATABASE X`** + +A new statement `SHOW CATALOGS` now acts as an alias for `SHOW DATABASES`. The statement `SHOW DATABASE X` is no longer supported. + +**`COPY FROM` now unzips Parquet files with gzip extensions** + +Before version 4.6, the `COPY FROM` command did not apply file-level decompression to Parquet files with a `.gzip` or `.gz` extension. The command treated these files as standard Parquet files, assuming that any compression existed only within the internal Parquet format structure. + +With the release of version 4.6, `COPY FROM` now processes Parquet files similarly to other formats. When a Parquet file has a `.gz` or `.gzip` extension, the command will first decompress the file before reading it as a Parquet format file. Hence, it will now fail while reading internally compressed Parquet files with gzip extensions. Users experiencing issues with loading files after this change should contact the support team at support@firebolt.io for assistance. + +### [](#bug-fixes-10)Bug Fixes + +**Fixed a rare bug that caused some query failures from incorrect computation of cacheable subresults** + +Fixed a rare bug impacting the logic that determined which subresults could be cached and reused. This issue could have caused query failures in certain patterns, but it did not impact the accuracy of the query outcomes. + +**Updated name of aggregatefunction2 to aggregatefunction in explain output** + +The name `aggregatefunction2` has been updated to `aggregatefunction` in the [EXPLAIN](/sql_reference/commands/queries/explain.html) output. + +**Fixed incorrect results in `ARRAY_AGG` expressions by excluding `NULL` values for false conditions in aggregating indexes** + +Aggregate expressions like `ARRAY_AGG(CASE WHEN THEN ELSE NULL END)` previously returned incorrect results by excluding `NULL` values for rows when the condition was `FALSE`. + +## [](#db-version-45)DB version 4.5 + +**September 2024** + +### [](#new-features-12)New Features + +**Allowed casting from `TEXT` to `DATE` with truncation of timestamp-related fields** Casting from `TEXT` to `DATE` now supports text values containing fields related to timestamps. These fields are accepted, but truncated during conversion to `DATE`. + +The following code example casts the `TEXT` representation of the timestamp `2024-08-07 12:34:56.789` to the `DATE` data type. The conversion truncates the time portion, leaving only the date, as follows: + +Example: + +``` +SELECT '2024-08-07 12:34:56.789'::DATE` +``` + +Results in + +``` +DATE `2024-08-07` +``` + +**Added the `CONVERT_FROM` function** + +Added the `CONVERT_FROM` function that converts a `BYTEA` value with a given encoding to a `TEXT` value encoded in UTF-8. + +**Added the BITWISE aggregate functions** + +Added support for the following functions: BIT\_OR (bitwise OR), BIT\_XOR (bitwise exclusive OR), and BIT\_AND (bitwise AND). + +**Added the `REGEXP_LIKE_ANY` function** + +Added the `REGEXP_LIKE_ANY` function that checks whether a given string matches any regular expression pattern from a specified list of patterns. + +### [](#bug-fixes-11)Bug Fixes + +**Updated `created` and `last_altered` column types in `information_schema.views` from `TIMESTAMP` to `TIMESTAMPTZ`** The data types of the `created` and `last_altered` columns in `information_schema.views` have been changed from `TIMESTAMP` to `TIMESTAMPTZ`. + +**Fixed runtime constant handling in the sort operator** Fixed the handling of runtime constants in the sort operator. Now, the sort operator can be correctly combined with `GENERATE_SERIES`. For example, the query `SELECT x, GENERATE_SERIES(1,7,3) FROM GENERATE_SERIES(1,3) t(x)` now correctly displays values `1` to `3` in the first column, instead of just `1`. + +## [](#db-version-44)DB version 4.4 + +**August 2024** + +### [](#new-features-13)New Features + +**Extended support for date arithmetic** + +Now you can subtract two dates to get the number of elapsed days. For example, `DATE '2023-03-03' - DATE '1996-09-03'` produces `9677`. + +**Role-based permissions for COPY FROM and external tables** + +Added support for role-based permissions (ARNs) to the COPY FROM command and external table operations. + +**Added `trust_policy_role` column to `information_schema.accounts` view for S3 access** + +Added a new column `trust_policy_role` to the `information_schema.accounts` view. This column shows the role used by Firebolt to access customer S3 buckets. + +**Enabled selection of external tables’ pseudo columns without adding data columns** + +Users can now select an external table’s pseudo columns (source file name, timestamp, size, and etag) without adding any data columns. For example, `select $source_file_timestamp from t_external` returns the file timestamps for each row. The query `select count($source_file_timestamp) from t_external` returns the total number of rows in the external table, similar to `count(*)`. The query `select count(distinct $source_file_name) from t_external` returns the number of distinct objects containing at least one row in the source S3 location. Regarding `count(*)` performance, formats like CSV or JSON still require reading the data fully to determine an external file’s row count. However, Parquet files provide the row count as part of the file header, and this is now used instead of reading the full data. + +**Extended support for arbitrary join conditions, including multiple inequality predicates** + +We now support more join conditions. As long as there is one equality predicate comparing a left column to a right column of the join (not part of an OR expression), the remaining join condition can now be an arbitrary expression. The limitation on the number of inequality predicates was removed. + +**New functions `URL_ENCODE` and `URL_DECODE`** + +We added support for the `URL_ENCODE` and `URL_DECODE` functions. + +**New logarithm functions `ln`, `log`** + +We added support for calculating logarithms. The natural logarithm is available using `ln(val double precision)`. The base 10 logarithm is available using `log(val double precision)`. Logarithms with custom bases are available using `log(base double precision, val double precision)`. + +**New function \`SQRT** + +Added support for the `SQRT` function to compute the square root. + +**New functions `JSON_VALUE`, `JSON_VALUE_ARRAY`, `JSON_EXTRACT_ARRAY`** + +Added support for the functions `JSON_VALUE`, `JSON_VALUE_ARRAY`, and `JSON_EXTRACT_ARRAY`. + +**New function `SESSION_USER`** + +Support has been added for the `SESSION_USER` function, which retrieves the current user name. + +**New columns in `information_schema.engine_query_history`** + +Added two new columns to `information_schema.engine_query_history`: `query_text_normalized_hash` and `query_text_normalized`. + +### [](#breaking-changes)Breaking Changes + +**Reserved the keyword GEOGRAPHY, requiring double quotes for use as an identifier** + +The word GEOGRAPHY is now a reserved keyword and must be quoted using double quotes for use as an identifier. For example, `create table geography(geography int);` will now fail, but `create table "geography" ("geography" int);` will succeed. + +**Deprecated the legacy HTTP ClickHouse headers** + +We no longer accept or return the legacy HTTP ClickHouse header format `X-ClickHouse-*`. + +**Fixed `json_value` zero-byte handling** + +The `json_value` function no longer returns null characters (0x00), as the TEXT datatype does not support them. For example, `select json_value('"\u0000"');` now results in an error. + +**Change default values for NODES and TYPE during CREATE ENGINE** + +When performing a CREATE ENGINE, the default values for NODES and TYPE parameters have changed. NODES defaults to `2` (previously `1`) and TYPE defaults to `M` (previously `S`). To create an engine with the previous default values, run the following command: + +``` +CREATE ENGINE my_engine WITH NODES=1 TYPE=S +``` + +### [](#bug-fixes-12)Bug Fixes + +**Fixed directory structure duplication in the S3 path when using the COPY TO statement with SINGLE\_FILE set to FALSE** + +Fixed an issue in `COPY TO` when `SINGLE_FILE=FALSE`. Previously, the specified directory structure in the location was repeated twice in the S3 path. For example, files were output to “s3://my-bucket/out/path/out/path/” instead of “s3://my-bucket/out/path/”. + +**Fixed the file extension in the S3 path when using the COPY TO statement with GZIP-Parquet format** + +Fixed an issue in `COPY TO` when `TYPE=PARQUET` and `COMPRESSION=GZIP`, which uses the Parquet file format with internal GZIP compression for the columns. Previously, the output files would have the extension “.parquet.gz”. Now, the extension is “.gz.parquet”. + +## [](#db-version-43)DB version 4.3 + +**August 2024** + +### [](#new-features-14)New Features + +**Role-based permissions for COPY FROM and External Table processes** + +Enabled role-based permissions for COPY FROM and External Table processes. + +**HLL-based count distinct functions compatible with the Apache DataSketches library** + +Firebolt now supports count-distinct functions using the HLL (HyperLogLog) algorithm, compatible with the Apache DataSketches library. For details and examples, see documentation on the functions [APACHE\_DATASKETCHES\_HLL\_BUILD](/sql_reference/functions-reference/datasketches/apache-datasketches-hll-build.html), [APACHE\_DATASKETCHES\_HLL\_MERGE](/sql_reference/functions-reference/datasketches/apache-datasketches-hll-merge.html), and [APACHE\_DATASKETCHES\_HLL\_ESTIMATE](/sql_reference/functions-reference/datasketches/apache-datasketches-hll-estimate.html). + +**Supported additional join conditions and removed the restriction on the number of inequality predicates** + +Firebolt has added enhanced support for more join conditions. As long as there is one equality predicate comparing a left column to a right column of the join, which is not part of a disjunctive (OR) expression, the remaining join condition can be arbitrary. The previous limitation on the number of inequality predicates has been removed. + +### [](#performance-improvements-8)Performance Improvements + +**Multi-node query performance** + +Firebolt has improved the performance of data transfer between nodes, resulting in faster overall query execution times. + +**Enhanced Interval Arithmetic Support** + +Firebolt has enhanced support for interval arithmetic. You can now use expressions of the form `date_time + INTERVAL * d`, where `date_time` is a expression of type Date, Timestamp, TimestampTz, and `d` is an expression of type DOUBLE PRECISION. The interval is now scaled by `d` before being added to `date_time`. For example, writing `INTERVAL '1 day' * 3` is equivalent to writing `INTERVAL '3 days'`. + +**Optimized selective inner and right joins on primary index and partition by columns to reduce rows scanned** + +Selective inner and right joins on primary index and partition by columns now can now benefit from pruning. This reduces the number of rows scanned by filtering out rows that are not part of the join result early in the process. This optimization works best when joining on the first primary index column or a partition by column. The optimization is applied automatically when applicable, and no action is required. Queries that used this optimization will display “Prune:” labels on the table scan in the EXPLAIN (PHYSICAL) or EXPLAIN (ANALYZE) output. + +### [](#bug-fixes-13)Bug Fixes + +**Fixed a bug in the combination of cross join and the `index_of` function** + +Resolved an issue where the `index_of` function would fail when applied to the result of a cross join that produced a single row. + +### [](#breaking-changes-1)Breaking Changes + +**Temporarily restricted column DEFAULT expressions in CREATE TABLE statements** + +Column DEFAULT expressions in CREATE TABLE statements have been temporarily restricted, they can only consist of literals and the following functions: `CURRENT_DATE()`, `LOCALTIMESTAMP()`, `CURRENT_TIMESTAMP()`, `NOW()`. Existing tables with column DEFAULT expressions are not affected. + +**Underflow detection while casting from TEXT to floating point data types** + +Firebolt now detects underflow, a condition where a numeric value becomes smaller than the minimum limit that a data type can represent, when casting from TEXT to floating point data types. For example, the query `select '10e-70'::float4;` now returns an error, while it previously returned `0.0`. + +**Returning query execution errors in JSON format through the HTTP API** + +Firebolt’s HTTP API now returns query execution errors in JSON format, allowing for future enhancements like including metadata such as error codes, or the location of a failing expression within the SQL script. + +**Changed default of case\_sensitive\_column\_mapping parameter in COPY FROM** + +The default value for the `CASE_SENSITIVE_COLUMN_MAPPING` parameter in `COPY FROM` is now `FALSE`, meaning that if a target table contains column names in uppercase and the source file to ingest has the same columns in lowercase, the ingestion will consider them the same column and ingest the data. + +**`extract` function returns Numeric(38,9) for Epoch, second, and millisecond extraction** + +The result data type of the `extract` function for epoch, second, and millisecond was changed to return the type Numeric(38,9) instead of a narrower Numeric type. For example, `select extract(second from '2024-04-22 07:10:20'::timestamp);` now returns Numeric(38,9) instead of Numeric(8,6). + +## [](#db-version-42)DB version 4.2 + +**July 2024** + +### [](#new-features-15)New features + +**New `ntile` window function** + +Firebolt now supports the `ntile` window function. Refer to our [NTILE](/sql_reference/functions-reference/window/ntile.html) documentation for examples and usage. + +### [](#enhancements-changes-and-new-integrations)Enhancements, changes and new integrations + +**Improved query performance** + +Queries with “`SELECT [project_list] FROM [table] LIMIT [limit]`” on large tables are now significantly faster. + +**Updated table level RBAC** + +Table level RBAC is now supported by Firebolt. This means that RBAC checks also cover schemas, tables, views and aggregating indexes. Refer to our [RBAC](/Guides/security/rbac.html) docs for a detailed overview of this new feature. The new Firebolt version inhibits the following change: + +- System built-in roles are promoted to contain table level RBAC information. This means that new privileges are added to `account_admin`, `system_admin` and `public` roles. The effect is transparent— any user assigned with those roles will not be affected. + +**Removal of Deprecated Columns from `INFORMATION_SCHEMA.ENGINES`** + +We removed the following columns from `INFORMATION_SCHEMA.ENGINES` that were only for FB 1.0 compatibility: `region`, `spec`, `scale`, `warmup`, and `attached_to`. These columns were always empty. (These columns are hidden and do not appear in `SELECT *` queries, but they will still work if referenced explicitly.) + +### [](#breaking-changes-2)Breaking Changes + +**Improved rounding precision for floating point to integer casting** + +Casting from floating point to integers now uses Banker’s Rounding, matching PostgreSQL’s behavior. This means that numbers that are equidistant from the two nearest integers are rounded to the nearest even integer: + +Examples: + +``` +SELECT 0.5::real::int +``` + +This returns 0. + +``` +SELECT 1.5::real::int +``` + +This returns 2. + +Rounding behavior has not changed for numbers that are strictly closer to one integer than to all others. + +**JSON functions update** + +Removed support for `json_extract_raw`, `json_extract_array_raw`, `json_extract_values`, and `json_extract_keys`. Updated `json_extract` function: the third argument is now `path_syntax`, which is a JSON pointer expression. See [JSON\_EXTRACT](/sql_reference/functions-reference/JSON/json-extract.html) for examples and usage. + +**Cluster ordinal update** + +Replaced `engine_cluster` with [cluster\_ordinal](/sql_reference/information-schema/engine-metrics-history.html) in `information_schema.engine_metrics_history`. The new column is an integer representing the cluster number. + +**Configurable cancellation behavior on connection drop** + +Introduced the `cancel_query_on_connection_drop` setting, allowing clients to control query cancellation on HTTP connection drop. Options include `NONE`, `ALL`, and `TYPE_DEPENDENT`. Refer to [system settings](/Reference/system-settings.html#query-cancellation-mode-on-connection-drop) for examples and usage. + +**JSON format as default for error output** + +The HTTP API now returns query execution errors in JSON format by default. This change allows for the inclusion of meta information such as error codes and the location of failing expressions in SQL scripts. + +**STOP ENGINE will drain currently running queries first** + +`STOP ENGINE` command now supports graceful drain, meaning any currently running queries will be run to completion. Once all the queries are completed, the engine will be fully stopped and terminated. If you want to stop the engine immediately, you can issue a STOP ENGINE command use the TERMINATE option. For example, to immediately stop an engine, my\_engine, you can use: + +``` + STOP ENGINE myEngine WITH TERMINATE = TRUE +``` + +**Scaling engines will not terminate currently running queries** + +`ALTER ENGINE` command now supports graceful drain, meaning when you scale an engine (vertically or horizontally), any currently running queries will not be terminated. New queries after the scaling operation will be directed to a new cluster, while queries running on the old cluster will be run to completion. + +**Updated RBAC ownership management** + +We have introduced several updates to role and privilege management: + +- The `security_admin` role will be removed temporarily and re-introduced in a later release. +- `Information_object_privileges` includes more privileges. Switching to to a specific user database (e.g by executing `use database db`) will only show privileges relevant for that database. Account-level privileges no longer show up when attached to a specific database. +- Every newly created user is granted with a `public` role. This grant can be revoked. + +## [](#db-version-41)DB version 4.1 + +**June 2024** + +- [Resolved issues](#resolved-issues) + +### [](#resolved-issues)Resolved issues + +- Fixed an issue causing errors when using `WHERE column IN (...)` filters on external table scans. + +## [](#db-version-40)DB version 4.0 + +**June 2024** + +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Breaking Changes](#breaking-changes) + +### [](#enhancements-changes-and-new-integrations-1)Enhancements, changes and new integrations + +**Query Cancelation on HTTP Connection Drop** + +Going forward, when the network connection between the client and Firebolt is dropped (for example because the Firebolt UI tab was closed or due to network issues), DML queries (INSERT, UPDATE, DELETE, etc) are no longer canceled automatically, but will keep running in the background. You can continue to monitor their progress in `information_schema.engine_running_queries` or cancel them manually using the `cancel query` statement if desired. DQL queries (SELECT) are still canceled automatically on connection drop. + +**New Aggregate Functions: `CHECKSUM` and `hash_agg`** + +`CHECKSUM` and `hash_agg` functions are now supported for aggregating indexes. Note that when the `hash_agg` function doesn’t receive rows, the result is 0. + +### [](#breaking-changes-3)Breaking Changes + +**Array Casting Nullability Update** + +Cast to array will no longer support specifying nullability of the inner type. Example: + +``` +a::array(int null) +``` + +or + +``` +cast(a as array(int not null)) +``` + +will now fail, and need to be rewritten as: + +``` +a::array(int) +``` + +or + +``` +cast(a as array(int)). +``` + +**Postgres-compliant Cast** + +Casts now behave the same across the product and adhere to the list of supported casts. Some usages of casts (explicit, implicit, or assignment cast) that were previously allowed are no longer supported and now result in errors. For more details on list of supported casts, see the documentation [here](/sql_reference/data-types.html#type-conversion). + +## [](#db-version-334)DB version 3.34 + +**May 2024** + +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Resolved issues](#resolved-issues) + +### [](#enhancements-changes-and-new-integrations-2)Enhancements, changes and new integrations + +**Removed `MATCH` function** + +The `match` function has been removed and replaced with [regexp\_like](/sql_reference/functions-reference/string/regexp-like.html). + +**Producing an error for array function failure instead of NULL** + +Array function queries that accept two or more array arguments now produce an error. If you call an array function such as `array_transform(..)` or `array_sort(..)` with multiple array arguments, the arrays must have the same size. For example: + +``` +array_transform(x, y -> x + y, arr1, arr2) +``` + +This raises an error if `array_length(arr1) != array_length(arr2)`. We now also perform this check for NULL literals. If you previously used `array_transform(x, y -> x + y, NULL::INT[], Array[5, 6])`, you got back `NULL`. Now, the query using that expression will raise an error. + +**Added ARRAY\_FIRST function** + +The [array\_first](../../sql_reference/functions-reference/Lambda/array-first.html) function has been added. It returns the first element in the given array for which the given function returns `true`. + +**New name for `any_match`** + +A new name for `any_match` has been added: [array\_any\_match](/sql_reference/functions-reference/Lambda/array-any-match.html). `any_match` will be kept as an alias. + +**Updated ARRAY\_SUM return types** + +The `array_sum` function of `bigint[]` now returns a numeric value and `array_sum` of `real[]` returns a real value. + +**Precedence of operators** + +Breaking change in operator precedence between comparison operators such as `=`, `<`, `>`, and `IS` operator. New behavior is compatible with Postgres. + +Examples of query that changed behavior: + +``` +select 1 is null = 2 is null +``` + +This used to be `true`, because it was interpreted as `select (1 is null) = (2 is null)`. It now becomes an error of incompatible types in `=` + +``` +select false = false is not null +``` + +The result used to be `false` - `select false = (false is not null)`, but now is `true` - `select (false = false) is not null`. + +**Dropping the role** + +Role cannot be dropped if there are permissions granted to the role. The error message will be displayed if you need to manually drop permissions associated to the role. + +**Coalesce Short-Circuiting** + +`COALESCE` now supports short-circuiting in Firebolt. Queries such as `COALESCE(a, 1 / 0) FROM t` could fail before, even when there were no NULLs in t. Only `CASE WHEN` supported short circuiting. Firebolt is now aligned with PostgreSQL and supports short circuiting in `COALESCE` as well. + +**Create table under I\_S schema** + +You can now execute `CREATE TABLE`/`VIEW`/`AGGREGATING INDEX` only under the public schema. + +**Improved error message for JSON `PARSE_AS_TEXT` format** + +The error message for external tables created with JSON `PARSE_AS_TEXT` format has been revised. This format reads specifically into a *single* column of type either TEXT or `TEXT NOT NULL`. (Note there may be external table partition columns defined after the single TEXT column, and they are okay). Now, only the error message regarding the `CREATE EXTERNAL TABLE` statement on a user’s first attempt to use `SELECT` will be seen. Support for reading format JSON `PARSE_AS_TEXT=TRUE` into a `TEXT NOT NULL` column has been added. + +**Implemented column\_mismatch** + +Support for `ALLOW_COLUMN_MISMATCH` in `COPY INTO` has been added. + +**Corrected NULL behavior of `STRING_TO_ARRAY`** + +The behavior of `string_to_array` now matches its behavior in PostgreSQL. The change affects NULL delimiters where the string is split into individual characters, as well as empty strings and where the output is now an empty array. + +**Changed city\_hash behavior for nullable inputs** + +The behavior for `city_hash` has changed for nullable inputs. For example: + +``` +SELECT CITY_HASH([null]) = CITY_HASH(['']) +``` + +This is now false. + +**Function `ARRAY_AGG` now preserves NULLS** + +The `array_agg` function has been changed to return PostgreSQL-compliant results: + +- `array_agg` now preserves `NULL` values in its input, e.g. `select array_agg(x) from unnest(array [1,NULL,2] x)` returns `{1,NULL,2}` +- `array_agg` now returns `NULL` instead of an empty array if there are no input values + +**Lambda parameters are no longer supported by `array_sum`** + +Array aggregate functions no longer support lambda parameters. To get the old behavior for conditional lambda functions, use transform instead. For example: + +``` +array_sum(transform(...)) +``` + +**Explicit Parquet conversion from DATE to INT is now needed** + +A breaking change has been implemented in raising an error on reading a Parquet/ORC `DATE`/`TIMESTAMP` column if the `EXTERNAL TABLE` expects the column to have type `INT`/`BIGINT`. `DATE`/`TIMESTAMP` cannot be cast to `INT`/`BIGINT`, and external table scans will no longer allow this cast either. You need to explicitly transform the Parquet/ORC `DATE`/`TIMESTAMP` column with `EXTRACT`(`EPOCH FROM` col) to insert it into an `INT`/`BIGINT` column. + +### [](#resolved-issues-1)Resolved issues + +- Fixed a bug where negation did not check for overflows correctly. + +## [](#db-version-333)DB version 3.33 + +**April 2024** + +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Resolved issues](#resolved-issues) + +### [](#enhancements-changes-and-new-integrations-3)Enhancements, changes and new integrations + +**Removed ‘element\_at’ Function** + +The `element_at` function for arrays has been removed and replaced with the `[]` operator. + +**Change of return type from BIGINT to INTEGER** + +The `index_of`/`array_position` function now returns INTEGER instead of BIGINT. + +**Removed LIMIT DISTINCT syntax** + +The `LIMIT_DISTINCT` syntax is no longer supported by Firebolt. + +**Updated CAST function behavior** + +All cast logic has been moved to runtime in Firebolt. The `castColumn` function is now replaced by `fbCastColumn`, ensuring consistent casting behavior and resolving issues with the `COPY FROM` operation and other cast calls. Uses of implicit/explicit `CAST` may result in errors due to this fix. + +New breaking change. + +### [](#resolved-issues-2)Resolved issues + +- Fixed a bug in `array_position` where searching for `NULL` in an array with non-null elements incorrectly returned a match in some cases. + +## [](#db-version-332)DB version 3.32 + +**April 2024** + +- [New features](#new-features) +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Resolved issues](#resolved-issues) + +### [](#new-features-16)New features + +**Expose and document ‘typeof’ as a toTypeName function** + +The `typeof` function has been added, which returns the data type of a SQL expression as a string. + +### [](#enhancements-changes-and-new-integrations-4)Enhancements, changes and new integrations + +**Spilling Aggregations** + +Firebolt can now process most aggregations that exceed the available main memory of the engine by spilling to the SSD cache when needed. This happens transparently to the user. A query that made use of this capability will populate the `spilled_bytes` column in `information_schema.query_history`. Spilling does not support aggregations where a single group exceeds the available memory (e.g., `select count(distinct high_cardinality_column) from huge_table`) and may not yet work reliably for all aggregate functions or engine specs. We will continue improving the feature in upcoming releases. + +**No overflow detected in cast from FLOAT to DECIMAL** + +Fix results of casting from `float32` to decimals with precision > 18. In addition to the correct results breaking change, there are certain queries that was working before that now will fail involving overflow. + +Example query: + +- `SELECT` 17014118346046923173168730371588410572::REAL::DECIMAL(37,0). + +Previously, this was working and returned a wrong result, but now it will fail with an overflow error. + +**ARRAY\_COUNT returns 0 instead of NULL** + +`ARRAY_COUNT` on `NULL` array now returns `0` instead of `NULL`. + +**No overflow check in arithmetic operations** + +Arithmetic operators (i.e. multiplication, addition, subtraction, and division) now perform correct overflow checking. This means that queries that used to return wrong results in the past now throw runtime errors. + +Example queries: + +- `SELECT` 4294967296 * 4294967296 -> now throws an error, before it would return 0 +- `SELECT` 9223372036854775807 + 9223372036854775807 -> now throws an error, before it would return -2 +- `SELECT` (a + b) * c -> this might throw runtime errors if there are large values in the column, but this is highly data dependent. + +**Implement bool\_or/bool\_and aggregation functions** + +New aggregate functions bool\_or and bool\_and have been added. + +**Remove old deprecate REGENERATE AGGREGATING INDEX** + +‘REGENERATE AGGREGATING INDEX’ syntax has now been removed. + +**Align the syntax of our “escape” string literals with PostgreSQL** + +Escape [string literals](/sql_reference/data-types.html) now support octal and Unicode escape sequences. As a result, escape string literals now behave exactly like PostgreSQL. Example: `SELECT E'\U0001F525b\x6F\154t';` returns `🔥bolt`. If the setting `standard_conforming_strings` is not enabled for you, regular string literals (e.g., `SELECT 'foo';`) will also recognize the new escape sequences. However, we recommend exclusively using escape string literals for using escape sequences. Please be aware that you will get different results if you previously used (escape) string literals containing the syntax we now use for Unicode and octal escape sequences. + +**Change return value of length and octet\_length to INT** + +Length and array\_length now return INTEGER instead of BIGINT. + +**Subqueries in the GROUP BY/HAVING/ORDER BY clauses change** + +Subqueries in `GROUP BY/HAVING/ORDER BY` can no longer references columns from the selection list of the outer query via their aliases as per PG compliance. `select 1 + 1 as a order by (select a);` used to work, but now fails with `unresolved name a` error. + +**Bytea serialization to CSV fix** + +Changed Bytea to CSV export: from escaped to non escaped. + +Example: + +- `COPY` (select ‘a’::bytea) to ‘s3…’; the results will now be “\\x61” instead of “\\x61”. + +### [](#resolved-issues-3)Resolved issues + +- Fixed results of casting literal float to numeric. In the past the float literal was casted to float first then to numeric, this caused us to lose precision. + +Examples: + +- `SELECT` 5000000000000000000000000000000000000.0::DECIMAL(38,1); -> 5000000000000000000000000000000000000.0 +- `SELECT` (5000000000000000000000000000000000000.0::DECIMAL(38,1)+5000000000000000000000000000000000000.0::DECIMAL(38 1)); -> ERROR: overflow. + +Note that before, it was not an error and resulted in: 9999999999999999775261218463046128332.8. + +- Fixed a longstanding bug with >= comparison on external table source\_file\_name. Whereas this would previously have scraped fewer files than expected off the remote S3 bucket, you will now get all files properly (lexicographically) compared against the input predicate. + + + +- Fixed a bug when `USAGE ANY ENGINE` (and similar) privileges were shown for * account. Now it is being show for current account. + + + +- Fixed a bug involving [‘btrim’](/sql_reference/functions-reference/string/btrim.html) string characters, where invoking `btrim`, `ltrim`, `rtrim`, or `trim` with a literal string but non-literal trim characters could result in an error. + +## [](#db-version-331)DB version 3.31 + +**March 2024** + +- [New features](#new-features) +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Resolved issues](#resolved-issues) + +### [](#new-features-17)New features + +**PG compliant division** + +LQP2 has a new division operator that is PG compliant, by default. + +**Prevents usage of new line delimeter for schema inference** + +An error will now occur if schema inference is used with the option “delimiter” set to something other than the default. + +### [](#enhancements-changes-and-new-integrations-5)Enhancements, changes and new integrations + +**Simplified table protobuf representation** + +Unique constraints in tables will be blocked for new accounts. + +**Support for nullable arrays** + +Support has been added to allow the [ARRAY\_ANY\_MATCH](/sql_reference/functions-reference/Lambda/array-any-match.html) lambda function to work with nullable arrays. + +**Updated AWS billing error message** + +The error message for an AWS billing issue on Engine Start was on Engine Start was changed to add more information and clarity. + +**New requirements updated for EXPLAIN** + +For `EXPLAIN` queries, we now allow only one of the following options at the same time: `ALL`, `LOGICAL`, `PHYSICAL`, `ANALYZE`.`EXPLAIN (ALL)` now returns the plans in multiple rows instead of multiple columns. + +**Disabled Unix Time Functions** + +The following functions are not supported anymore: ‘from\_unixtime’ ‘to\_unix\_timestamp’ ‘to\_unix\_time’ + +**Renamed spilled metrics columns** + +The columns `spilled_bytes_uncompressed` and `spilled_bytes_compressed` of `information_schema.query_history` have been replaced by a single column `spilled_bytes`. It contains the amount of data that was spilled to disk temporarily while executing the query. + +**Aggregating index placement** + +Aggregating index is now placed in the same namespace as tables and views. + +**Syntax and planner support for LATERAL scoping** + +[LATERAL](/Reference/reserved-words.html) is now a reserved keyword. It must now be used within double-quotes when using it as an object identifier. + +### [](#resolved-issues-4)Resolved issues + +Changed return for division by 0 from null to fail. + +Updated error log for upload failure for clarity. + +Fixed a bug in ‘unnest’ table function that occurred when not all of the ‘unnest’ columns were projected. + +Changed the behavior of [split\_part](/sql_reference/functions-reference/string/split-part.html) when an empty string is used as delimiter. + +Fixed a bug where floating point values `-0.0` and `+0.0`, as well as `-nan` and `+nan` were not considered equal in distributed queries. + +TRY\_CAST from TEXT to NUMERIC now works as expected: if the value cannot be parsed as NUMERIC it produces null. + +## [](#db-version-330)DB version 3.30 + +**November 2023** + +- [New features](#new-features) +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) +- [Resolved issues](#resolved-issues) + +### [](#new-features-18)New features + +**New comparison operators** + +[New comparison operators](/sql_reference/operators.html) `IS DISTINCT FROM` and `IS NOT DISTINCT FROM` have been added. + +### [](#enhancements-changes-and-new-integrations-6)Enhancements, changes and new integrations + +**Support for nullable arrays** + +Support has been added to allow the ANY\_MATCH lambda function to work with nullable arrays + +### [](#resolved-issues-5)Resolved issues + +- Indirectly granted privileges have been removed from the `information_schema.object_privileges` view. +- Fixed an issue where `ARRAY_FIRST` and `ARRAY_FIRST_INDEX` returned an error if the given input was nullable. + +## [](#db-version-329)DB version 3.29 + +**October 2023** + +- [New features](#new-features) +- [Enhancements, changes, and new integrations](#enhancements-changes-and-new-integrations) + +### [](#new-features-19)New features + +**EXPLAIN ANALYZE now available for detailed query metrics** + +You can now use the [EXPLAIN command](/sql_reference/commands/queries/explain.html) to execute `EXPLAIN (ANALYZE) ` without executing the query. It shows how query processing is distributed over the nodes of an engine. + +### [](#enhancements-changes-and-new-integrations-7)Enhancements, changes and new integrations + +**Virtual column ‘source\_file\_timestamp’ uses new data type** + +The virtual column `source_file_timestamp` has been migrated from the data type `TIMESTAMP` (legacy timestamp type without time zone) to the type `TIMESTAMPTZ` (new timestamp type with time zone). + +Despite the increased resolution, the data is still in second precision as AWS S3 provides them only as unix seconds. + +Use `source_file_timestamp - NOW()` instead of `DATE_DIFF('second', source_file_timestamp, NOW())` + +**New function added** + +A new alias [ARRAY\_TO\_STRING](/sql_reference/functions-reference/array/array-to-string.html) has been added to function `ARRAY_JOIN`. + +## [](#db-version-328)DB version 3.28 + +**September 2023** + +- [Resolved issues](#resolved-issues) + +### [](#resolved-issues-6)Resolved issues + +- `IN` expressions with scalar arguments now return Postgres-compliant results if there are `NULL`s in the `IN` list. +- information\_schema.running\_queries returns ID of a user that issued the running query, not the current user. +- Update error message to explain upper case behavior \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_reserved_words.md b/cmd/docs-scrapper/fireboltdocs/reference_reserved_words.md new file mode 100644 index 0000000..14636c2 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_reserved_words.md @@ -0,0 +1,117 @@ +# [](#reserved-words)Reserved words + +Reserved words have special meanings to the Firebolt SQL engine. We recommend that you do not use these words for naming databases, tables, columns, variables, or other objects that you define. If you must use a reserved word in this way, enclose it in quotation marks. + +- ALL +- ALTER +- AND +- ARRAY +- BETWEEN +- BIGINT +- BOOL +- BOOLEAN +- BOTH +- CALL +- CASE +- CAST +- CHAR +- CONCAT +- COPY +- CREATE +- CROSS +- CURRENT\_DATE +- CURRENT\_TIMESTAMP +- DATABASE +- DATE +- DATETIME +- DECIMAL +- DELETE +- DESCRIBE +- DISTINCT +- DOUBLE +- DOUBLECOLON +- DOW +- DOY +- DROP +- EMPTY\_IDENTIFIER +- ENGINE +- EPOCH +- EXCEPT +- EXECUTE +- EXISTS +- EXPLAIN +- EXTRACT +- FALSE +- FETCH +- FIRST +- FLOAT +- FROM +- FULL +- GENERATE +- GROUP +- HAVING +- IF +- ILIKE +- IN +- INNER +- INSERT +- INT +- INTEGER +- INTERSECT +- INTERVAL +- IS +- ISNULL +- JOIN +- JOIN\_TYPE +- LATERAL +- LEADING +- LEFT +- LIKE +- LIMIT +- LIMIT\_DISTINCT +- LOCALTIMESTAMP +- LONG +- NATURAL +- NEXT +- NOT +- NULL +- NUMERIC +- OFFSET +- ON +- ONLY +- OR +- ORDER +- OUTER +- OVER +- PARTITION +- PRECISION +- PREPARE +- PRIMARY +- QUARTER +- RIGHT +- ROW +- ROWS +- SAMPLE +- SELECT +- SET +- SHOW +- SYSTEM +- TEXT +- TIME +- TIMESTAMP +- TOP +- TRAILING +- TRIM +- TRUE +- TRUNCATE +- UNION +- UNKNOWN\_CHAR +- UNNEST +- UNTERMINATED\_STRING +- UPDATE +- USING +- VARCHAR +- WEEK +- WHEN +- WHERE +- WITH \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/reference_system_settings.md b/cmd/docs-scrapper/fireboltdocs/reference_system_settings.md new file mode 100644 index 0000000..6943775 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/reference_system_settings.md @@ -0,0 +1,218 @@ +# [](#firebolt-system-settings)Firebolt system settings + +You can use a `SET` statement in a SQL script to configure aspects of Firebolt’s system behavior. Each statement is a query in its own right and must be terminated with a semi-colon (;). The `SET` statement cannot be included in other queries. This topic provides a list of available settings by function. + +## [](#setting-the-time-zone)Setting the time zone + +Use this setting to specify the session time zone. Time zone names are from the [Time Zone Database](http://www.iana.org/time-zones). You can see the list of tz database time zones [here](http://en.wikipedia.org/wiki/List_of_tz_database_time_zones). For times in the future, the latest known rule for the given time zone is applied. Firebolt does not support time zone abbreviations, as they cannot account for daylight savings time transitions, and some time zone abbreviations have meant different UTC offsets at different times. The default value of the `timezone` setting is UTC. + +### [](#syntax)Syntax + +``` +SET timezone = '' +``` + +### [](#example)Example + +The following code example demonstrates how setting the timezone parameter affects the interpretation and conversion of `TIMESTAMPTZ` values: + +``` +SET timezone = 'UTC'; +SELECT TIMESTAMPTZ '1996-09-03 11:19:33.123456 Europe/Berlin'; --> 1996-09-03 09:19:33.123456+00 +SELECT TIMESTAMPTZ '2023-1-29 6:3:42.7-3:30'; --> 2023-01-29 09:33:42.7+00 + +SET timezone = 'Israel'; +SELECT TIMESTAMPTZ '2023-1-29 12:21:49'; --> 2023-01-29 12:21:49+02 +SELECT TIMESTAMPTZ '2023-1-29Z'; --> 2023-01-29 02:00:00+02 +``` + +## [](#enable-parsing-for-literal-strings)Enable parsing for literal strings + +If set to `true`, strings are parsed without escaping, treating backslashes literally. By default, this setting is enabled. + +### [](#syntax-1)Syntax + +``` +SET standard_conforming_strings = [true|false] +``` + +### [](#example-1)Example + +The following code example demonstrates how setting `standard_conforming_strings` affects the interpretation of escape sequences in string literals: + +``` +SET standard_conforming_strings = false; +SELECT '\x3132'; --> 132 + +SET standard_conforming_strings = true; +SELECT '\x3132'; --> \x3132 +``` + +## [](#statement-timeout)Statement timeout + +Specifies the number of milliseconds a SQL statement is allowed to run. Any SQL statement or query exceeding the specified time is canceled. A value of zero disables the timeout by default. + +### [](#syntax-2)Syntax + +``` +SET statement_timeout = ; +``` + +### [](#example-2)Example + +The following SQL example sets the query timeout to three seconds: + +``` +SET statement_timeout = 3000; +``` + +## [](#limit-the-number-of-result-rows)Limit the number of result rows + +When set to a value greater than zero, this setting limits the number of rows returned by `SELECT` statements. The query is executed as if an additional `LIMIT` clause is added to the SQL query. A value of zero or less means that no limit is applied. By default, no limit to the number of result rows is applied. + +### [](#syntax-3)Syntax + +``` +SET max_result_rows = ; +``` + +### [](#example-3)Example + +The following queries all return the same result. For the first query, no explicit settings are set: + +``` +SELECT * FROM table LIMIT 10000; + +SET max_result_rows = 10000; +SELECT * FROM table; + +SET max_result_rows = 10000; +SELECT * FROM table LIMIT 20000; +``` + +## [](#query-cancellation-mode-on-connection-drop)Query cancellation mode on connection drop + +Specify how the query should behave when the HTTP connection to Firebolt is dropped, such as when the UI window is closed. For this, you can choose between 3 different modes: + +- `NONE`: The query will not be canceled on connection drop +- `ALL` : The query will be canceled on connection drop +- `TYPE_DEPENDENT`: Only queries without side effects will be canceled, such as `SELECT`. + +The default is `TYPE_DEPENDENT`. + +### [](#syntax-4)Syntax + +``` +SET cancel_query_on_connection_drop = +``` + +### [](#example-4)Example + +The following code example demonstrates how to control query cancellation behavior when a connection drops using `none`, `all`, and `type_dependent` modes for `SET cancel_query_on_connection_drop`: + +``` +SET cancel_query_on_connection_drop = none; +INSERT INTO X [...] +SELECT * FROM X; + +SET cancel_query_on_connection_drop = all; +INSERT INTO X [...] +SELECT * FROM X; + +SET cancel_query_on_connection_drop = type_dependent; +INSERT INTO X [...] +SELECT * FROM X; +``` + +## [](#query-labelingtagging)Query labeling/tagging + +Use this option to label your query with a custom text. This simplifies query cancellation and retrieving the query status from system tables. + +### [](#syntax-5)Syntax + +``` +SET query_label = '' +``` + +### [](#example-5)Example + +The following code example assigns a query label to a query using `SET query_label`, allowing you to track it in `information_schema`, `engine_running_queries`, and `information_schema.engine_query_history`. It then demonstrates how to retrieve the `QUERY_ID` for the labeled query and cancel it using `CANCEL QUERY`: + +``` +SET query_label = 'Hello Firebolt'; +SELECT * FROM X; + +SET query_label = ''; + +SELECT query_id, * FROM information_schema.engine_running_queries WHERE query_label = 'Hello Firebolt' +SELECT query_id, * FROM information_schema.engine_query_history WHERE query_label = 'Hello Firebolt' + +CANCEL QUERY WHERE query_id = '' +``` + +## [](#multi-cluster-engine-warmup)Multi-cluster engine warmup + +Use this option to distribute queries across all clusters of an engine, simplifying the process of initializing cached data to a consistent state across all clusters after a `START ENGINE` or `ALTER ENGINE` operation. + +Warmup queries complete after they have run on all clusters of the engine. The queries return an empty result if they succeed on all clusters. If the query fails on any cluster, it returns an error. If multiple errors occur, only one error is returned. + +### [](#syntax-6)Syntax + +``` +SET warmup = true; +``` + +### [](#example-6)Example + +The following code example activates the warmup mode so that the query runs on `production_table` using all clusters of an engine, and returns an empty result upon success: + +``` +USE ENGINE multi_cluster_engine; +SET warmup = true; +SELECT checksum(*) FROM production_table; +SET warmup = false; +``` + +## [](#result-cache)Result cache + +Set `enable_result_cache` to `FALSE` to disable the use of Firebolt’s [result cache](/Overview/queries/understand-query-performance-subresult.html), which is set to `TRUE` by default. Disabling result cashing can be useful for benchmarking query performance. When `enable_result_cache` is disabled, resubmitting the same query will recompute the results rather than retrieving them from cache. + +### [](#syntax-7)Syntax + +``` +SET enable_result_cache = [true|false]; +``` + +### [](#example-7)Example + +The following code example disables the result cache so that no previously cached results are used, and no new cache entries are written: + +``` +SET enable_result_cache = false; +SELECT checksum(*) FROM production_table; +``` + +## [](#subresult-cache)Subresult cache + +Firebolt implements [advanced cross-query optimization](/Overview/queries/understand-query-performance-subresult.html) that allows SQL queries to reuse intermediate query execution states from previous requests. Subresult caching operates at a semantic level, which allows Firebolt to understand and optimize queries based on the meaning and context of the data rather than solely based on their syntax or structure. This capability allows Firebolt to optimize across different query patterns for improved efficiency. + +Set `enable_subresult_cache` to `FALSE` to disable Firebolt’s subresult caching, which is set to `TRUE` by default. + +Disabling subresult caching is generally **not recommended**, as it can negatively impact query performance, especially for complex workloads. For most benchmarking scenarios, disable the result cache instead, as described in the previous [Result cache](#result-cache) section. This approach affects only the final result caching while preserving the benefits of subresult optimizations. + +### [](#syntax-8)Syntax + +``` +SET enable_subresult_cache = [true|false]; +``` + +### [](#example-8)Example + +The following code example disables the subresult cache so no previously cached subresult is used and no new cache entries are written by this query: + +``` +SET enable_subresult_cache = false; +SELECT count(*) FROM fact_table INNER JOIN dim_table ON (a = b); +``` + +Setting `enable_subresult_cache` to `FALSE` disables the use of all [cached subresults](/Overview/queries/understand-query-performance-subresult.html). In particular, it deactivates two caching mechanisms that normally speed up query runtimes: the use of the `MaybeCache` operator, which includes the full result cache, and the hash-table cache used by the `Join` operator. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference.md b/cmd/docs-scrapper/fireboltdocs/sql_reference.md new file mode 100644 index 0000000..54a2315 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference.md @@ -0,0 +1,12 @@ +# [](#sql-reference)SQL reference + +Reference SQL syntax for functions and commands, as well as available data type and operators. Check available views for observing metadata. + +* * * + +- [Data types](/sql_reference/data-types.html) +- [Information schema](/sql_reference/information-schema/) +- [Operators](/sql_reference/operators.html) +- [Functions glossary](/sql_reference/functions-reference/functions-glossary.html) +- [SQL commands](/sql_reference/commands/) +- [SQL functions](/sql_reference/functions-reference/functions-reference.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_bytea_data_type.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_bytea_data_type.md new file mode 100644 index 0000000..63d3c06 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_bytea_data_type.md @@ -0,0 +1,130 @@ +# [](#bytea-data-type)BYTEA data type + +This topic describes the Firebolt implementation of the `BYTEA` data type. + +Not all functions support the `BYTEA` data type currently. For more information, see [BYTEA functions](/sql_reference/functions-reference/bytea/) + +- [Overview](#overview) + + - [Type conversions](#type-conversions) + - [Comparison operator](#comparison-operator) + - [Literal string interpretation](#literal-string-interpretation) + - [Output format](#output-format) + - [Importing `BYTEA` from external source](#importing-bytea-from-external-source) + +## [](#overview)Overview + +The `BYTEA` data type is a variable length binary string data type, commonly used to store binary data, like images, other multimedia files, or raw bytes of information. A binary string is a sequence of bytes - unlike `TEXT`, there is no character set. The `BYTEA` data type is nullable. + +### [](#type-conversions)Type conversions + +The `BYTEA` type can be cast to and from the `TEXT` data type. A cast from `BYTEA` to `TEXT` will interpret the binary string to a hexadecimal representation with `\x` as a prefix. For example `SELECT 'a'::BYTEA::TEXT` returns `\x61`. + +Cast from `TEXT` to `BYTEA` supports two formats, **hex** and **escaped**: + +**Hex** +Using hex format, the `TEXT` type data must start with `\x`. Characters `\n`, `\t`, `\r` and ‘ ‘ are ignored if they are not in sequence of two characters representing one byte. Each character must be in one of the following ranges: `a-f`, `A-F`, `0-9`. +Characters must be in pairs. For example, `\x aa` is a valid hex format, but `\xa a` is invalid. + +**Escape** +Using escape format, an escaped backslash becomes just a single backslash: `\\` -> `\`. One backslash must be followed by 3 numbers representing octal value (base 8) in range of `000-377`. For example, `a \375` + +In addition to casting, the [ENCODE](/sql_reference/functions-reference/bytea/encode.html) and [DECODE](/sql_reference/functions-reference/bytea/decode.html) functions can be used to represent `TEXT` as `BYTEA` and vice versa, but will behave slightly differently. For example, `SELECT ENCODE('1'::BYTEA, 'HEX');` returns `31`, while `SELECT CAST('1'::BYTEA as TEXT);` returns `\x31`, both of type `TEXT`. + +### [](#comparison-operator)Comparison operator + +The `BYTEA` comparison operator will work as lexicographical comparison but with bytes. Two empty `BYTEA` type expressions are equal. If two `BYTEA` type expressions have equivalent bytes and are of the same length, then they are equal. In a greater than (>) or less than (<) comparison, two `BYTEA` type expressions are compared byte by byte, and the first mismatching byte defines which is greater or less than the other. + +**Examples:** + +`SELECT '\xa3'::BYTEA > '\xa2'::BYTEA;` returns `TRUE`. + +`SELECT '\xa3'::BYTEA = '\xa300'::BYTEA;` returns `FALSE`. + +`SELECT '\xa3'::BYTEA < '\xa300'::BYTEA;` returns `TRUE`. + +### [](#literal-string-interpretation)Literal string interpretation + +Literal strings will be interpreted according to the setting [`standard_conforming_strings`,](/Reference/system-settings.html#enable-parsing-for-literal-strings) which controls whether strings are parsed with or without escaping. Similar to [CAST](/sql_reference/functions-reference/conditional-and-miscellaneous/cast.html) from `TEXT` to `BYTEA`, the two text formats hex and escape are supported. + +**Examples:** + +``` +SET standard_conforming_strings = false; +SELECT '\x3132'::BYTEA; -> '\x313332' +SELECT '\x31 32 '::BYTEA; -> '\x3120202033322020' +SELECT 'a b\230a'::BYTEA; -> '\x61206232333061' + +set standard_conforming_strings = true; +SELECT '\x3132'::BYTEA; -> '\x3132' +SELECT '\x31 32 '::BYTEA; -> '\x3132' +SELECT 'a b\230a'::BYTEA; -> '\x6120629861' +``` + +### [](#output-format)Output format + +The output format for `BYTEA` is the hexadecimal representation of the bytes in lower case prefixed by `\x` (Note: in JSON `\` is escaped). + +**Example:** + +``` +SELECT 'a'::BYTEA; +``` + +**Returns:** + +``` +{ + "data": + [ + ["\\x61"] + ] +} +``` + +### [](#importing-bytea-from-external-source)Importing `BYTEA` from external source + +The input format for importing binary data from an external source depends on the external file format. + +**For ORC or PARQUET files:** For a specific field type without annotation (UTF-8 for example): BYTE\_ARRAY (binary), bytes will be imported exactly as they are in the source. All the other types will be imported to the corresponding datatype (for example, BYTE\_ARRAY with UTF-8 annotation will be imported to `TEXT` data type) and then cast to type `BYTEA`. + +**For CSV, TSV, or JSON files:** The input data are read exactly as they are in the source, and then cast to data type `BYTEA`. + +JSON files must be UTF-8 encoded; however this is not required for CSV and TSV files. In the case that these files are not UTF-8 encoded, field values must not start with `\x` - data starting with the characters `\x` will throw an error on ingest. Any data not starting with the characters `\x` will be copied as bytes to the column of data type `BYTEA`. + +**CSV File Example:** + +*file* + +```csv +'row1' +'a�a' +'\xaabf' +15 +'15' +``` + +**SQL** + +``` +CREATE EXTERNAL TABLE ex_table +( + column1 BYTEA +) URL = 's3://...' + OBJECT_PATTERN = '...' + TYPE = (CSV); + +SELECT * FROM ex_table; +``` + +**Returns:** + +```table +| column1 | +| ---------- | +| \x726f7731 | +| \x61ff61 | +| \xaabf | +| \x3135 | +| \x3135 | +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands.md new file mode 100644 index 0000000..fff1652 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands.md @@ -0,0 +1,91 @@ +# [](#sql-commands)SQL commands + +Use the alphabetical list in the navigation pane to find the syntax for commands that you already know. + +Use the functional list below to find commands for a specific task area that you’re working in. + +- [Queries](#queries) + Analyze data with `SELECT`. Tune and optimize query performance with other commands. +- [Data management](#data-management) + Move data between your data lake and Firebolt and between Firebolt resources. +- [Data definition](#data-definition) + Data definition language. Create, alter, drop, and otherwise manage objects like databases, tables, and views in your Firebolt account. +- [Engines](#engines) + Start, stop, and manage Firebolt engines. +- [Metadata](#metadata) + Query the Firebolt information schema for metadata related to its objects and resources. +- [Access control](#access-control) + Access control language. Create, alter and drop, and otherwise manage users, logins, service accounts and roles. + +## [](#queries)Queries + +- [EXPLAIN](/sql_reference/commands/queries/explain.html) +- [RECOMMEND DDL](/sql_reference/commands/queries/recommend_ddl.html) +- [SELECT](/sql_reference/commands/queries/select.html) + +## [](#data-management)Data management + +- [COPY FROM](/sql_reference/commands/data-management/copy-from.html) +- [COPY TO](/sql_reference/commands/data-management/copy-to.html) +- [DELETE](/sql_reference/commands/data-management/delete.html) +- [INSERT](/sql_reference/commands/data-management/insert.html) +- [TRUNCATE TABLE](/sql_reference/commands/data-management/truncate-table.html) +- [UPDATE](/sql_reference/commands/data-management/update.html) +- [VACUUM](/sql_reference/commands/data-management/vacuum.html) + +## [](#data-definition)Data definition + +- [ALTER ACCOUNT](/sql_reference/commands/data-definition/alter-account.html) +- [ALTER DATABASE](/sql_reference/commands/data-definition/alter-database.html) +- [ALTER TABLE](/sql_reference/commands/data-definition/alter-table.html) +- [CREATE ACCOUNT](/sql_reference/commands/data-definition/create-account.html) +- [CREATE AGGREGATING INDEX](/sql_reference/commands/data-definition/create-aggregating-index.html) +- [CREATE DATABASE](/sql_reference/commands/data-definition/create-database.html) +- [CREATE EXTERNAL TABLE](/sql_reference/commands/data-definition/create-external-table.html) +- [CREATE FACT or DIMENSION TABLE](/sql_reference/commands/data-definition/create-fact-dimension-table.html) +- [CREATE VIEW](/sql_reference/commands/data-definition/create-view.html) +- [DROP ACCOUNT](/sql_reference/commands/data-definition/drop-account.html) +- [DROP DATABASE](/sql_reference/commands/data-definition/drop-database.html) +- [DROP INDEX](/sql_reference/commands/data-definition/drop-index.html) +- [DROP TABLE](/sql_reference/commands/data-definition/drop-table.html) +- [DROP VIEW](/sql_reference/commands/data-definition/drop-view.html) +- [USE DATABASE](/sql_reference/commands/data-definition/use-database.html) + +## [](#engines)Engines + +- [ALTER ENGINE](/sql_reference/commands/engines/alter-engine.html) +- [CREATE ENGINE](/sql_reference/commands/engines/create-engine.html) +- [DROP ENGINE](/sql_reference/commands/engines/drop-engine.html) +- [START ENGINE](/sql_reference/commands/engines/start-engine.html) +- [STOP ENGINE](/sql_reference/commands/engines/stop-engine.html) +- [USE ENGINE](/sql_reference/commands/engines/use-engine.html) + +## [](#metadata)Metadata + +- [DESCRIBE](/sql_reference/commands/metadata/describe.html) +- [SHOW CACHE](/sql_reference/commands/metadata/show-cache.html) +- [SHOW CATALOGS](/sql_reference/commands/metadata/show-catalogs.html) +- [SHOW COLUMNS](/sql_reference/commands/metadata/show-columns.html) +- [SHOW ENGINES](/sql_reference/commands/metadata/show-engines.html) +- [SHOW INDEXES](/sql_reference/commands/metadata/show-indexes.html) +- [SHOW TABLES](/sql_reference/commands/metadata/show-tables.html) +- [SHOW VIEWS](/sql_reference/commands/metadata/show-views.html) + +## [](#access-control)Access control + +- [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) +- [ALTER NETWORK POLICY](/sql_reference/commands/access-control/alter-network-policy.html) +- [ALTER SERVICE ACCOUNT](/sql_reference/commands/access-control/alter-service-account.html) +- [ALTER USER](/sql_reference/commands/access-control/alter-user.html) +- [CREATE LOGIN](/sql_reference/commands/access-control/create-login.html) +- [CREATE NETWORK POLICY](/sql_reference/commands/access-control/create-network-policy.html) +- [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) +- [CREATE SERVICE ACCOUNT](/sql_reference/commands/access-control/create-service-account.html) +- [CREATE USER](/sql_reference/commands/access-control/create-user.html) +- [DROP LOGIN](/sql_reference/commands/access-control/drop-login.html) +- [DROP NETWORK POLICY](/sql_reference/commands/access-control/drop-network-policy.html) +- [DROP ROLE](/sql_reference/commands/access-control/drop-role.html) +- [DROP SERVICE ACCOUNT](/sql_reference/commands/access-control/drop-service-account.html) +- [DROP USER](/sql_reference/commands/access-control/drop-user.html) +- [GRANT](/sql_reference/commands/access-control/grant.html) +- [REVOKE](/sql_reference/commands/access-control/revoke.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control.md new file mode 100644 index 0000000..bbae0dd --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control.md @@ -0,0 +1,21 @@ +## [](#access-control)Access control + +* * * + +- [ALTER LOGIN](/sql_reference/commands/access-control/alter-login.html) +- [ALTER NETWORK POLICY](/sql_reference/commands/access-control/alter-network-policy.html) +- [ALTER ROLE](/sql_reference/commands/access-control/alter-role.html) +- [ALTER SERVICE ACCOUNT](/sql_reference/commands/access-control/alter-service-account.html) +- [ALTER USER](/sql_reference/commands/access-control/alter-user.html) +- [CREATE LOGIN](/sql_reference/commands/access-control/create-login.html) +- [CREATE NETWORK POLICY](/sql_reference/commands/access-control/create-network-policy.html) +- [CREATE ROLE](/sql_reference/commands/access-control/create-role.html) +- [CREATE SERVICE ACCOUNT](/sql_reference/commands/access-control/create-service-account.html) +- [CREATE USER](/sql_reference/commands/access-control/create-user.html) +- [DROP LOGIN](/sql_reference/commands/access-control/drop-login.html) +- [DROP NETWORK POLICY](/sql_reference/commands/access-control/drop-network-policy.html) +- [DROP ROLE](/sql_reference/commands/access-control/drop-role.html) +- [DROP SERVICE ACCOUNT](/sql_reference/commands/access-control/drop-service-account.html) +- [DROP USER](/sql_reference/commands/access-control/drop-user.html) +- [GRANT](/sql_reference/commands/access-control/grant.html) +- [REVOKE](/sql_reference/commands/access-control/revoke.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_login.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_login.md new file mode 100644 index 0000000..912b93e --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_login.md @@ -0,0 +1,44 @@ +# [](#alter-login)ALTER LOGIN + +Updates the configuration of the specified login. + +For more information, see [Managing logins](/Guides/managing-your-organization/managing-logins.html). + +## [](#syntax)Syntax + +``` +ALTER LOGIN SET + [ IS_PASSWORD_ENABLED = { TRUE | FALSE } ] + [ IS_MFA_ENABLED = { TRUE | FALSE } ] + [ NETWORK_POLICY = | DEFAULT ] + [ IS_ORGANIZATION_ADMIN = { TRUE | FALSE } ] + [ IS_ENABLED = { TRUE | FALSE } ] + [ FIRST_NAME = ] + [ LAST_NAME = ] +``` + +or + +``` +ALTER LOGIN RENAME TO ; +``` + +A login that was provisioned via SSO cannot be renamed. + +## [](#parameters)Parameters + +Parameter Description `` The name of the login in the form of an email address. The login must be unique within the organization. `IS_PASSWORD_ENABLED` A `BOOLEAN` value specifying if login with password is enabled. By default this is `TRUE` and passwords can be used to log in. `IS_MFA_ENABLED` A `BOOLEAN` value specifying if the login has multi-factor authentication (MFA) enabled. By default this value is `FALSE`. If set to `TRUE`, an enrollment email will be sent to the ``. `` An optional parameter to define the network policy to link to the created login. Specifying `DEFAULT` will detach any linked network policy. `IS_ORGANIZATION_ADMIN` A `BOOLEAN` value specifying if the login is an organization admin. By default this value is `FALSE`. `IS_ENABLED` A `BOOLEAN` value specifying whether authentication with this login should be possible. Disable login if you want to prevent access to the system without dropping it. ``, `` The first and last name of the user to use the login. If the parameter is included, these values cannot be empty. `` The new name of the login in the form of an email address. The login must be unique within the organization. Note: if the login was provisioned via SSO, it cannot be renamed. + +## [](#example)Example + +The following command will rename the “alexs@acme.com” login to “alexspotter@acme.com”. + +``` +ALTER LOGIN "alexs@acme.com" RENAME TO "alexspotter@acme.com"; +``` + +This command will link the network policy “my\_network\_policy” to the “alexs@acme.com” login. + +``` +ALTER LOGIN "alexs@acme.com" SET NETWORK_POLICY = "my_network_policy"; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_network_policy.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_network_policy.md new file mode 100644 index 0000000..a02bd4d --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_network_policy.md @@ -0,0 +1,52 @@ +# [](#alter-network-policy)ALTER NETWORK POLICY + +Updates the configuration of the existing network policy by specifying its name, a list of internet protocol (IP) addresses to allow or block, and an optional description. + +For more information, see [Network policies](/Guides/security/network-policies.html). + +## [](#syntax)Syntax + +``` +ALTER NETWORK POLICY [ IF EXISTS ] + SET [ ALLOWED_IP_LIST = ( '', '' ... ) ] + [ BLOCKED_IP_LIST = ( '', '' ... ) ] + [ DESCRIPTION = '' ] + +ALTER NETWORK POLICY [ IF EXISTS ] + ADD [ ALLOWED_IP_LIST = ( '', '' ... ) ] + [ BLOCKED_IP_LIST = ( '', '' ... ) ] + +ALTER NETWORK POLICY [ IF EXISTS ] + REMOVE [ BLOCKED_IP_LIST = ( '', '' ... ) ] + [ BLOCKED_IP_LIST = ( '', '' ... ) ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the network policy to edit. `` A comma-separated and quoted list of IP addresses to **allow** in the specified network policy. `` A comma-separated and quoted list of IP addresses to **block** in the specified network policy. `` (Optional) A description for the specified network policy. + +## [](#examples)Examples + +**Example** + +The following code example modifies the existing network policy ‘my\_network\_policy’ by replacing its allowed and blocked IP lists with specified values and an updating its description: + +``` +ALTER NETWORK POLICY my_network_policy SET ALLOWED_IP_LIST = ('4.5.6.7', '2.4.5.7') BLOCKED_IP_LIST = ('6.7.8.9') DESCRIPTION = 'updated network policy' +``` + +**Example** + +The following code example adds an IP address `192.168.5.1` to the allowed list of the existing network policy `my_network_policy`: + +``` +ALTER NETWORK POLICY my_network_policy ADD ALLOWED_IP_LIST = ('192.168.5.1'); +``` + +**Example** + +The following code example removes the IP address `6.7.8.9` from the blocked list of the network policy `my_network_policy`: + +``` +ALTER NETWORK POLICY my_network_policy REMOVE BLOCKED_IP_LIST = ('6.7.8.9'); +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_role.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_role.md new file mode 100644 index 0000000..f90060a --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_role.md @@ -0,0 +1,17 @@ +# [](#alter-role)ALTER ROLE + +Updates the specified role. + +## [](#alter-role-owner-to)ALTER ROLE OWNER TO + +Change the owner of a role. + +### [](#syntax)Syntax + +``` +ALTER ROLE OWNER TO +``` + +### [](#parameters)Parameters + +Parameter Description `` Name of the role to change the owner of. `` The new owner of the role. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_service_account.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_service_account.md new file mode 100644 index 0000000..097248b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_service_account.md @@ -0,0 +1,49 @@ +# [](#alter-service-account)ALTER SERVICE ACCOUNT + +Updates the configuration of the specified service account. + +For more information, see [Service accounts](/Guides/managing-your-organization/service-accounts.html). + +## [](#syntax)Syntax + +``` +ALTER SERVICE ACCOUNT SET + [ NETWORK_POLICY = | DEFAULT ] + [ DESCRIPTION = | DEFAULT ] + [ IS_ORGANIZATION_ADMIN = { TRUE | FALSE } ] + [ CONNECTION_PREFERENCE = { PUBLIC_ONLY | PRIVATE_ONLY | PREFER_PUBLIC | PREFER_PRIVATE | DEFAULT } ] + [ IS_ENABLED = { TRUE | FALSE } ] +``` + +or + +``` +ALTER SERVICE ACCOUNT RENAME TO ; +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the service account to edit. `` An optional description for the service account. `` An optional parameter to define the network policy to link to the edited service account. Specifying `DEFAULT` will detach any linked network policy. `IS_ORGANIZATION_ADMIN` A `BOOLEAN` value specifying if the service account is an organization admin. By default, this value is `FALSE`. `IS_ENABLED` A `BOOLEAN` value specifying whether authentication with this service account should be possible. Disable the service account to prevent access without dropping it. `CONNECTION_PREFERENCE` Defines how the service account connects to Firebolt. The default value is `PREFER_PUBLIC` if not specified. See **Connection preferences** for details. `` The new name of the service account. Must start with a letter and may contain only alphanumeric, digit, or underscore (\_) characters. + +### [](#connection-preferences)Connection Preferences + +The `CONNECTION_PREFERENCE` parameter determines how a [service account](/Guides/managing-your-organization/service-accounts.html) accesses Firebolt: + +- **`PUBLIC_ONLY`** : Allows access only through public APIs. +- **`PRIVATE_ONLY`** : Allows access only through AWS PrivateLink. +- **`PREFER_PUBLIC`** (Default): Prefers public APIs but can use AWS PrivateLink if needed. +- **`PREFER_PRIVATE`** : Prefers AWS PrivateLink but can use public APIs if needed. + +## [](#example)Example + +The following code example renames the service account `sa1` to `serviceaccount1`: + +``` +ALTER SERVICE ACCOUNT "sa1" RENAME TO "serviceaccount1"; +``` + +The following code example assigns the network policy `my_network_policy` to the `serviceaccount1` service account: + +``` +ALTER SERVICE ACCOUNT "serviceaccount1" SET NETWORK_POLICY = "my_network_policy"; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_user.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_user.md new file mode 100644 index 0000000..2d31a56 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_alter_user.md @@ -0,0 +1,59 @@ +# [](#alter-user)ALTER USER + +Updates the configuration of the specified user. + +For more information, see [Managing users](/Guides/managing-your-organization/managing-users.html). + +Users can modify most of their own account settings without requiring [RBAC](/Overview/Security/Role-Based%20Access%20Control/#role-based-access-control-rbac) permissions, except when altering [LOGIN](/Guides/managing-your-organization/managing-logins.html) configurations or a [SERVICE ACCOUNT](/Guides/managing-your-organization/service-accounts.html). + +## [](#alter-user-set)ALTER USER SET + +### [](#syntax)Syntax + +``` +ALTER USER SET + [ LOGIN = | DEFAULT ] + [ SERVICE_ACCOUNT = | DEFAULT ] + [ DEFAULT_DATABASE = | DEFAULT ] + [ DEFAULT_ENGINE = | DEFAULT ]; +``` + +or + +``` +ALTER USER RENAME TO ; +``` + +### [](#parameters)Parameters + +Parameter Description `` The name of the user, may contain non-alpha-numeric characters such as exclamation points (!), percent signs (%), dots (.), underscores (\_), dashes (-), and asterisks (\*). Strings containing non-alphanumeric characters must be enclosed in single or double quotes. For more information about the full set of naming rules, see the [object identifiers guide](/Reference/object-identifiers.html#user-names). `` The new name of the user, used with the `RENAME TO` option. The new user name can be any string, and can also contain spaces and non-alpha-numeric characters such as exclamation points (!), percent signs (%), at signs(@), dot signs (.), underscore signs (\_), minus signs (-), and asterisks (\*). If the string contains spaces or non-alphanumeric characters, it must be enclosed in single or double quotes. `` An optional, case-insensitive parameter to specify the name of the login to link the user with, used with the `SET` option. This cannot be used in conjunction with the `SERVICE_ACCOUNT` parameter - a user can be linked to a login OR a service account but not both. `DEFAULT` disassociates the user from its login. The user will become unusable before it’s re-associated with some login or service account. `` An optional parameter to specify the name of the service account to link the user with, used with the `SET` option. This cannot be used in conjunction with the `LOGIN` parameter - a user can be linked to a login OR a service account but not both. `DEFAULT` disassociates the user from its service account. The user will become unusable before it’s re-associated with some login or service account. `` An optional parameter to define a default database for the user (for future purposes). Used with the `SET` option. `` An optional parameter to define a default engine for the user (for future purposes).Used with the `SET` option. + +### [](#example)Example + +The following command will rename the “alex” account to “alexs”. + +``` +ALTER USER "alex" RENAME TO "alexs"; +``` + +This command will link the user “alex” to the “alexs@acme.com” login. + +``` +ALTER USER alex SET LOGIN="alexs@acme.com"; +``` + +## [](#alter-user-owner-to)ALTER USER OWNER TO + +Change the owner of a user. The current owner of a user can be viewed in the `information_schema.users` view on `user_owner` column. + +Check [ownership](/Guides/security/ownership.html) page for more info. + +### [](#syntax-1)Syntax + +``` +ALTER USER OWNER TO +``` + +### [](#parameters-1)Parameters + +Parameter Description `` The name of the user to change the owner of. `` The new owner of the user. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_login.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_login.md new file mode 100644 index 0000000..a1572d3 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_login.md @@ -0,0 +1,29 @@ +# [](#create-login)CREATE LOGIN + +Creates a new login. + +For more information, see [Managing logins](/Guides/managing-your-organization/managing-logins.html). + +## [](#syntax)Syntax + +``` +CREATE LOGIN [ IF NOT EXISTS ] + WITH [ IS_PASSWORD_ENABLED = { TRUE | FALSE }] + [ IS_MFA_ENABLED = { TRUE | FALSE }] + [ NETWORK_POLICY = ] + [ IS_ORGANIZATION_ADMIN = { TRUE | FALSE }] + FIRST_NAME = , + LAST_NAME = +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the login in the form of an email address. The login must be unique within the organization. `IS_PASSWORD_ENABLED` A `BOOLEAN` value specifying if login with password is enabled. By default this is `TRUE` and passwords can be used to log in. `IS_MFA_ENABLED` A `BOOLEAN` value specifying if the login has multi-factor authentication (MFA) enabled. By default this value is `FALSE`. If set to `TRUE`, an enrollment email will be sent to the ``. `` An optional parameter to define the network policy to link to the created login. `IS_ORGANIZATION_ADMIN` A `BOOLEAN` value specifying if the login is an organization admin. By default this value is `FALSE`. ``, `` The first and last name of the user that will use the created login. + +## [](#example)Example + +The following command will create an account in the US East (N. Virginia) region. + +``` +CREATE LOGIN "alexs@acme.com" WITH FIRST_NAME = 'Alex' LAST_NAME = 'Summers'; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_network_policy.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_network_policy.md new file mode 100644 index 0000000..8f4af42 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_network_policy.md @@ -0,0 +1,26 @@ +# [](#create-network-policy)CREATE NETWORK POLICY + +Creates a new network policy. + +For more information, see [Network policies](/Guides/security/network-policies.html). + +## [](#syntax)Syntax + +``` +CREATE NETWORK POLICY [IF NOT EXISTS] +WITH ALLOWED_IP_LIST = ( '', '', ... ) +[ BLOCKED_IP_LIST = ( '', '', ... ) ] +[ DESCRIPTION = '' ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the network policy. Must start with a letter, and may contain only alphanumeric and underscore(\_) characters. `` The comma-separated and quoted list of IP addresses to allow in the created network policy. `` An optional comma-separated and quoted list of IP addresses to block in the created network policy. `` An optional description for the created network policy. + +## [](#example)Example + +The following command will create a network policy that allows IPs ‘4.5.6.1’ and ‘2.4.5.1’ and blocks the IP address ‘6.7.8.1’, with a description: + +``` +CREATE NETWORK POLICY my_network_policy WITH ALLOWED_IP_LIST = ('4.5.6.1', '2.4.5.1') DESCRIPTION = 'my new network policy' +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_role.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_role.md new file mode 100644 index 0000000..b6ef59f --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_role.md @@ -0,0 +1,33 @@ +# [](#create-role)CREATE ROLE + +Creates a new role. + +For more information, see [Role-based access control](/Guides/security/rbac.html). + +## [](#syntax)Syntax + +``` +CREATE ROLE [ IF NOT EXISTS ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the role. + +## [](#example)Example + +The following command will create a role “user\_role” + +``` +CREATE ROLE user_role; +``` + +## [](#example-2)Example 2 + +The following command will create a role “user\_role\_2” + +``` +CREATE ROLE IF NOT EXISTS my_role_2 +``` + +If “my\_role\_2” exists, no error message is thrown. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_service_account.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_service_account.md new file mode 100644 index 0000000..99c1529 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_service_account.md @@ -0,0 +1,38 @@ +# [](#create-service-account)CREATE SERVICE ACCOUNT + +Creates a new service account. + +For more information, see [Service accounts](/Guides/managing-your-organization/service-accounts.html). + +## [](#syntax)Syntax + +``` +CREATE SERVICE ACCOUNT [ IF NOT EXISTS ] + [ WITH + [ DESCRIPTION = ] + [ NETWORK_POLICY = ] + [ IS_ORGANIZATION_ADMIN = { TRUE|FALSE } ] + [ CONNECTION_PREFERENCE = { PUBLIC_ONLY | PRIVATE_ONLY | PREFER_PUBLIC | PREFER_PRIVATE | DEFAULT } ] + ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the service account. Must start with a letter and may contain only alphanumeric, digit, or underscore (\_) characters. `` An optional description for the service account. `` An optional parameter to define the network policy to link to the created service account. `IS_ORGANIZATION_ADMIN` A `BOOLEAN` value specifying if the login is an organization admin. By default, this value is `FALSE`. `CONNECTION_PREFERENCE` Defines how the service account connects to Firebolt. The default value is `PREFER_PUBLIC` if not specified. See **Connection preferences** for details. + +### [](#connection-preferences)Connection preferences + +The `CONNECTION_PREFERENCE` parameter determines how a [service account](/Guides/managing-your-organization/service-accounts.html) accesses Firebolt: + +- **`PUBLIC_ONLY`** : Allows access only through public APIs. +- **`PRIVATE_ONLY`** : Allows access only through AWS PrivateLink. +- **`PREFER_PUBLIC`** (Default): Prefers public APIs but can use AWS PrivateLink if needed. +- **`PREFER_PRIVATE`** : Prefers AWS PrivateLink but can use public APIs if needed. + +## [](#example)Example + +The following code example creates a service account `sa1` linked to the `my_network_policy` network policy: + +``` +CREATE SERVICE ACCOUNT "sa1" WITH NETWORK_POLICY = my_network_policy +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_user.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_user.md new file mode 100644 index 0000000..f8d67c1 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_create_user.md @@ -0,0 +1,29 @@ +# [](#create-user)CREATE USER + +Creates a new user in Firebolt. + +For more information, see [Managing users and roles](/Guides/managing-your-organization/managing-users.html). + +## [](#syntax)Syntax + +``` +CREATE USER [ IF NOT EXISTS ] +[ WITH +[ LOGIN = | SERVICE_ACCOUNT = ] +[ DEFAULT_DATABASE = ] +[ DEFAULT_ENGINE = ] +[ ROLE = [,...] ] +] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the user, may contain non-alpha-numeric characters such as exclamation points (!), percent signs (%), dots (.), underscores (\_), dashes (-), and asterisks (\*). Strings containing non-alphanumeric characters must be enclosed in single or double quotes. For more information about the full set of naming rules, see the [object identifiers guide](/Reference/object-identifiers.html#user-names). `` (Optional) Specifies the name of the login to link the user with. This cannot be used in conjunction with the `SERVICE_ACCOUNT` parameter because a user can be linked to either a login OR a service account but not both. `` (Optional) Specifies the name of the service account to link the user with. The `` parameter cannot be used in conjunction with the `LOGIN_NAME` parameter because a user can be linked to a login OR a service account but not both. `` (Optional) Defines the default database for the user. `` (Optional) Defines the default engine for the user. `[, ...]` (Optional) Defines a role for the user. Additional roles can be granted after the user is created. When assigning multiple roles, enclose the list of roles in parentheses. If no role is specified, the user is not granted any roles. + +## [](#example)Example + +The following code example creates a user named `alex`, links it to the login `alexs@acme.com`, and assigns it the roles of `analyst` and `data_engineer`: + +``` +CREATE USER alex WITH LOGIN= "alexs@acme.com" ROLE= (analyst, data_engineer); +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_login.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_login.md new file mode 100644 index 0000000..4cb1413 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_login.md @@ -0,0 +1,25 @@ +# [](#drop-login)DROP LOGIN + +Deletes an account. + +For more information, see [Managing logins](/Guides/managing-your-organization/managing-logins.html). + +## [](#syntax)Syntax + +``` +DROP LOGIN [ IF EXISTS ] ; +``` + +If the login is linked to a user, it can not be dropped. In order to drop a login linked to a user, the link must be reset `alter user foo set login="new-login@acme.com"|DEFAULT` or the user dropped. + +## [](#parameters)Parameters + +Parameter Description `` The name of the login to delete. + +## [](#example)Example + +The following command will delete the “alexs@acme.com” login. + +``` +DROP LOGIN "alexs@acme.com"; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_network_policy.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_network_policy.md new file mode 100644 index 0000000..104c2fc --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_network_policy.md @@ -0,0 +1,25 @@ +# [](#drop-network-policy)DROP NETWORK POLICY + +Deletes a network policy. + +For more information, see [Network policies](/Guides/security/network-policies.html). + +## [](#syntax)Syntax + +``` +DROP NETWORK POLICY [ RESTRICT | CASCADE ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the network policy to delete. `RESTRICT` or `CASCADE` An optional parameter to specify deletion mode. +RESTRICT mode prevents dropping the network policy if there is any login, service account or organization linked. RESTRICT mode is used by default. +CASCADE mode automatically drops the network policy and all its links to other objects. + +## [](#example)Example + +The following command will delete “my\_network\_policy”. + +``` +DROP NETWORK POLICY my_network_policy [ RESTRICT | CASCADE ] +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_role.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_role.md new file mode 100644 index 0000000..8cf36b9 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_role.md @@ -0,0 +1,35 @@ +# [](#drop-role)DROP ROLE + +Deletes a role. Note that role cannot be dropped if there are permissions granted to the role, in this case error message will be displayed and you need manually to drop permissions granted to the role and retry. + +For more information, see [Role-based access control](/Guides/security/rbac.html). + +A role cannot be dropped if there are permissions granted to the role. In this case, an error message will be displayed, and you need to manually drop the permissions granted to the role and retry. + +## [](#syntax)Syntax + +``` +DROP ROLE [ IF EXISTS ] +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the role. + +## [](#example)Example + +The following command will delete the role “user\_role” + +``` +DROP ROLE user_role; +``` + +### [](#example-2)Example 2 + +The following command will delete the role “my\_role\_2” + +``` +DROP ROLE IF EXISTS my_role_2 +``` + +If “my\_role\_2” does not exist, no error message is thrown. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_service_account.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_service_account.md new file mode 100644 index 0000000..ffaef32 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_service_account.md @@ -0,0 +1,25 @@ +# [](#drop-service-account)DROP SERVICE ACCOUNT + +Deletes a service account. + +For more information, see [Service accounts](/Guides/managing-your-organization/service-accounts.html). + +## [](#syntax)Syntax + +``` +DROP SERVICE ACCOUNT ; +``` + +If the service account is linked to a user, it can not be dropped. In order to drop a service account linked to a user, the link must be reset `alter user foo set service_account=new_service_account|DEFAULT` or the user dropped. + +## [](#parameters)Parameters + +Parameter Description `` The name of the service account to delete. + +## [](#example)Example + +The following command will delete the “sa1” service account. + +``` +DROP SERVICE ACCOUNT "sa1"; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_user.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_user.md new file mode 100644 index 0000000..dcd0438 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_drop_user.md @@ -0,0 +1,27 @@ +# [](#drop-user)DROP USER + +Deletes a user. + +For more information, see [Managing users](/Guides/managing-your-organization/managing-users.html). + +A user cannot be dropped if it owns objects. In this case, an error message will be displayed, and you need to manually drop the objects, or transfer ownership. + +for more information, see [Ownership](/Guides/security/ownership.html). + +## [](#syntax)Syntax + +``` +DROP USER [ IF EXISTS ] ; +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the user to delete. If the user name contains spaces or non-alphanumeric characters, it must be enclosed in single or double quotes. + +## [](#example)Example + +The following command will delete the “alex” user. + +``` +DROP USER alex; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_grant.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_grant.md new file mode 100644 index 0000000..0df2966 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_grant.md @@ -0,0 +1,103 @@ +# [](#grant)GRANT + +Grants permission or assignment to a role. `GRANT` can also be used to assign a role to another role or a user. + +For more information, see [Role-based access control](/Overview/Security/Role-Based%20Access%20Control/role-management/). + +## [](#grant-privilege)GRANT PRIVILEGE + +Grants a permission to a role. + +Only an account\_admin or a role owner can grant a permission to a role. To grant a permission, you must first have that permission granted to you. + +### [](#syntax)Syntax + +``` +GRANT ON [IN ] TO +``` + +### [](#parameters)Parameters + +Parameter Description `` The name of the permission to grant to a role. Available permissions vary depending on the object that they apply to. For a full list, see [Permissions](/Overview/Security/Role-Based%20Access%20Control/role-management/). `` The type of object to grant permissions on. `` The name of the object to grant permissions on. `` The name of the role to grant the permission to. + +### [](#examples)Examples + +**Grant `USAGE` on a single database** + +The following code example grants the `USAGE` privilege on the `db` database to the role `user_role`, allowing it to access the database: + +``` +GRANT USAGE ON DATABASE db TO user_role; +``` + +**Grant `USAGE` on all databases within an account** + +The following code example grants the `USAGE` privilege on all databases in the `dev` account to the role `user_role`, allowing access to them: + +``` +GRANT USAGE ANY DATABASE ON ACCOUNT dev TO user_role; +``` + +**Grant access to a databse, schema, and a specific table** + +The following code example grants the role `user_role` access to the `db` database, the `public` schema within the `db` database, and permission to read data from the `my_table` table in the `public` schema: + +``` +GRANT USAGE ON DATABASE db TO user_role; +GRANT USAGE ON SCHEMA public IN DATABASE db TO user_role; +USE DATABASE db; +GRANT SELECT ON TABLE my_table IN SCHEMA public TO user_role; +``` + +**Grant access to a database, schema, and all operations on a specific table** + +The following code example grants the role `user_role` access to the `db` database, the `public` schema within the `db` database, and all permissions on the `my_table` table in the `public` schema: + +``` +GRANT USAGE ON DATABASE db TO user_role; +GRANT USAGE ON SCHEMA public IN DATABASE db TO user_role; +USE DATABASE db; +GRANT ALL ON TABLE my_table IN SCHEMA public TO user_role; +``` + +**Grant access to all existing and future tables or views in a schema** + +The following code example grants `user_role` access to the `db` database, the `public` schema within the `db` database, and permission to query all existing and future tables or views in the `public` schema: + +``` +GRANT USAGE ON DATABASE db TO user_role; +GRANT USAGE ON SCHEMA public IN DATABASE db TO user_role; +GRANT SELECT ANY ON SCHEMA public IN DATABASE db TO user_role; +``` + +## [](#grant-role)GRANT ROLE + +Grants a role to either a user or another role, allowing the recipient to inherit the permissions associated with the granted role. + +### [](#syntax-1)Syntax + +``` +GRANT ROLE TO { USER | ROLE } +``` + +### [](#parameters-1)Parameters + +Parameter Description `` The name of the role to grant. `` The name of the user to grant `` to. `` The name of the role to assign the role to. + +### [](#examples-1)Examples + +**Grant a role to another role** + +The following code example assigns the `role_name` role to `role_name_2`, allowing `role_name_2` to inherit all the permissions granted to `role_name`: + +``` +GRANT ROLE role_name TO ROLE role_name_2; +``` + +**Grant a role to a user** + +The following code example assigns the `role_name` role to `user_name`, allowing the user to inherit all the permissions granted to `role_name`: + +``` +GRANT ROLE role_name TO USER user_name; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_revoke.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_revoke.md new file mode 100644 index 0000000..8650ea0 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_access_control_revoke.md @@ -0,0 +1,96 @@ +# [](#revoke)REVOKE + +Revokes permissions from a role. `REVOKE` can also be used to revoke a role from another role or a user. + +For more information, see [Role-based access control](/Guides/security/rbac.html). + +## [](#revoke-privilege)REVOKE PRIVILEGE + +Revokes a permission from a role. + +Only account\_admin or a role owner can revoke a permission to a role. + +### [](#syntax)Syntax + +``` +REVOKE ON [IN ] FROM +``` + +### [](#parameters)Parameters + +Parameter Description `` The name of the permission to revoke from a role. Permissions that can be revoked vary depending on the object that they apply to. For a full list, see [Permissions](/Overview/Security/Role-Based%20Access%20Control/). `` The type of the object to revoke permissions from. `` The name of the object to revoke permissions from. `` The name of the role from which the permission will be revoked. + +### [](#examples)Examples + +**Revoke `MODIFY` permission on a database** + +The following code example revokes the `MODIFY` permission on the `db` database from the role `user_role`, preventing it from making changes to the database: + +``` +REVOKE MODIFY ON DATABASE db FROM user_role; +``` + +**Revoke all permissions on a database** + +The following code example revokes the all permissions on the `db` database from the role `user_role`, preventing all operations on it: + +``` +REVOKE ALL ON DATABASE db FROM user_role; +``` + +**Revoke `USAGE` permissions on all databases in an account** + +The following code example revokes `USAGE` permissions on all databases in the `dev` account from the role `user_role`, preventing it from accessing metadata or using those databases: + +``` +REVOKE USAGE ANY DATABASE ON ACCOUNT dev FROM user_role; +``` + +**Revoke `SELECT` permission on a specific table** + +The following code example sets the active database to `db` and revokes `user_role`’s permission to read data from the `my_table` table in the `public` schema.: + +``` +USE DATABASE db; +REVOKE SELECT ON TABLE my_table IN SCHEMA public TO user_role; +``` + +**Revoke `SELECT` permission on all tables in a schema** + +The following code revokes `user_role`’s permission to read data from all existing and future tables in the `public` schema of the `db` database: + +``` +REVOKE SELECT ANY ON SCHEMA public IN DATABASE db TO user_role; +``` + +## [](#revoke-role)REVOKE ROLE + +Revokes a role from a user or from another role. + +### [](#syntax-1)Syntax + +``` +REVOKE ROLE FROM { USER | ROLE } +``` + +## [](#parameters-1)Parameters + +Parameter Description `` The name of the role to revoke. `` The name of the user from which to revoke the `` role. `` The name of the role from which to revoke the role. + +### [](#examples-1)Examples + +**Revoke a role from another role** + +The following code example removes the `role_name` role from `role_name_2`, revoking access to permissions granted to `role_name`: + +``` +REVOKE ROLE role_name FROM ROLE role_name_2; +``` + +**Revoke a role from a user** + +The following command revokes role `role_name` from user `user_name`, removing the user’s access to the permissions granted to `role_name`: + +``` +REVOKE ROLE role_name FROM USER user_name; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition.md new file mode 100644 index 0000000..df412c2 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition.md @@ -0,0 +1,26 @@ +## [](#data-definition)Data definition + +* * * + +- [ALTER ACCOUNT](/sql_reference/commands/data-definition/alter-account.html) +- [ALTER DATABASE](/sql_reference/commands/data-definition/alter-database.html) +- [ALTER ORGANIZATION](/sql_reference/commands/data-definition/alter-organization.html) +- [ALTER SCHEMA](/sql_reference/commands/data-definition/alter-schema.html) +- [ALTER TABLE](/sql_reference/commands/data-definition/alter-table.html) +- [ALTER VIEW](/sql_reference/commands/data-definition/alter-view.html) +- [CREATE ACCOUNT](/sql_reference/commands/data-definition/create-account.html) +- [CREATE AGGREGATING INDEX](/sql_reference/commands/data-definition/create-aggregating-index.html) +- [CREATE DATABASE](/sql_reference/commands/data-definition/create-database.html) +- [CREATE EXTERNAL TABLE](/sql_reference/commands/data-definition/create-external-table.html) +- [CREATE LOCATION](/sql_reference/commands/data-definition/create-location.html) +- [CREATE TABLE](/sql_reference/commands/data-definition/create-fact-dimension-table.html) +- [CREATE TABLE AS SELECT (CTAS)](/sql_reference/commands/data-definition/create-fact-dimension-table-as-select.html) +- [CREATE TABLE CLONE](/sql_reference/commands/data-definition/create-table-clone.html) +- [CREATE VIEW](/sql_reference/commands/data-definition/create-view.html) +- [DROP ACCOUNT](/sql_reference/commands/data-definition/drop-account.html) +- [DROP DATABASE](/sql_reference/commands/data-definition/drop-database.html) +- [DROP INDEX](/sql_reference/commands/data-definition/drop-index.html) +- [DROP LOCATION](/sql_reference/commands/data-definition/drop-location.html) +- [DROP TABLE](/sql_reference/commands/data-definition/drop-table.html) +- [DROP VIEW](/sql_reference/commands/data-definition/drop-view.html) +- [USE DATABASE](/sql_reference/commands/data-definition/use-database.html) \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_account.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_account.md new file mode 100644 index 0000000..1e60ea2 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_account.md @@ -0,0 +1,23 @@ +# [](#alter-account)ALTER ACCOUNT + +Updates the configuration of the specified database ``. + +For more information, see [Managing accounts](/Guides/managing-your-organization/managing-accounts.html). + +## [](#syntax)Syntax + +``` +ALTER ACCOUNT RENAME TO ; +``` + +## [](#parameters)Parameters + +Parameter Description `` The name of the account to be altered. `` The new name for the account. The account name must start and end with an alphabetic character and cannot contain spaces or special characters except for hyphens (-). + +## [](#example)Example + +The following command will rename the “dev” account to “staging”. + +``` +ALTER ACCOUNT dev RENAME TO staging; +``` \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_database.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_database.md new file mode 100644 index 0000000..e775633 --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_database.md @@ -0,0 +1,40 @@ +# [](#alter-database)ALTER DATABASE + +Updates the configuration of the specified database. + +## [](#alter-database-description)ALTER DATABASE DESCRIPTION + +### [](#syntax)Syntax + +``` +ALTER DATABASE WITH + [DESCRIPTION = ] +``` + +### [](#parameters)Parameters + +Parameter Description `` The name of the database to be altered. `` The description of the database. + +### [](#example)Example + +The following example alters a description of the database: + +``` +ALTER DATABASE my_database WITH DESCRIPTION = 'Database for query management'; +``` + +## [](#alter-database-owner-to)ALTER DATABASE OWNER TO + +Change the owner of a database. The current owner of a database can be viewed in the [information\_schema.catalogs](/sql_reference/information-schema/catalogs.html) view on `catalog_owner` column. + +check [ownership](/Guides/security/ownership.html) page for more info. + +### [](#syntax-1)Syntax + +``` +ALTER DATABASE OWNER TO +``` + +### [](#parameters-1)Parameters + +Parameter Description `` The name of the database to change the owner of. `` The new owner of the database. \ No newline at end of file diff --git a/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_organization.md b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_organization.md new file mode 100644 index 0000000..3aadd0b --- /dev/null +++ b/cmd/docs-scrapper/fireboltdocs/sql_reference_commands_data_definition_alter_organization.md @@ -0,0 +1,42 @@ +# [](#alter-organization)ALTER ORGANIZATION + +Updates the specified organization to manage Single Sign-On configuration. + +For more information, see [Configure SSO](/Guides/security/sso/). + +## [](#syntax)Syntax + +``` +ALTER ORGANIZATION SET + [ SSO = '{ + “signOnUrl”: “”, + “signOutUrl”: “”, + “issuer”: “”, + “provider”: “”, + “label”: “