Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Jenkins job to backfill user properties on Amplitude. #1664

Merged
merged 1 commit into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package analytics

import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm


class AmplitudeUserPropertiesBackfill {
public static def job = { dslFactory, allVars ->
dslFactory.job("amplitude-user-properties-backfill") {
logRotator common_log_rotator(allVars)
authorization common_authorization(allVars)
parameters secure_scm_parameters(allVars)
parameters {
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.')
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.')
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.')
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.')
stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.')
stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.')
stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.')
stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.')
}
environmentVariables {
env('KEY_PATH', allVars.get('KEY_PATH'))
env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH'))
env('USER', allVars.get('USER'))
env('ACCOUNT', allVars.get('ACCOUNT'))
env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH'))
env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION'))
}
multiscm secure_scm(allVars) << {
git {
remote {
url('$ANALYTICS_TOOLS_URL')
branch('$ANALYTICS_TOOLS_BRANCH')
credentials('1')
}
extensions {
relativeTargetDirectory('analytics-tools')
pruneBranches()
cleanAfterCheckout()
}
}
}
wrappers {
timestamps()
credentialsBinding {
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault');
}
}
publishers common_publishers(allVars)
steps {
shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh'))
}
}
}
}
2 changes: 2 additions & 0 deletions dataeng/jobs/createJobsNew.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggers
import static analytics.RetirementJobs.job as RetirementJobsJob
import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob
import static analytics.SnowflakeExpirePasswords.job as SnowflakeExpirePasswordsJob
import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob
import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob
import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob
import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob
Expand Down Expand Up @@ -50,6 +51,7 @@ def taskMap = [
RETIREMENT_JOBS_JOB: RetirementJobsJob,
SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob,
SNOWFLAKE_EXPIRE_PASSWORDS_JOB: SnowflakeExpirePasswordsJob,
AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob,
SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob,
SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob,
PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob,
Expand Down
40 changes: 40 additions & 0 deletions dataeng/resources/amplitude-properties-backfill.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -ex

# Creating Python virtual env
PYTHON_VENV="python_venv"
virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}"
source "${PYTHON_VENV}/bin/activate"

# Setup
cd $WORKSPACE/analytics-tools/snowflake
make requirements

# Do not print commands in this function since they may contain secrets.
set +x

# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable
# which is implicitly used by subsequent vault commands within this script.
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \
role_id=${ANALYTICS_VAULT_ROLE_ID} \
secret_id=${ANALYTICS_VAULT_SECRET_ID}
)

API_KEY=$(
vault kv get \
-version=${AMPLITUDE_VAULT_KV_VERSION} \
-field=API_KEY \
${AMPLITUDE_VAULT_KV_PATH} \
)

python amplitude_user_properties_update.py \
--key_path $KEY_PATH \
--passphrase_path $PASSPHRASE_PATH \
--automation_user $USER \
--account $ACCOUNT \
--amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \
--columns_to_update $COLUMNS_TO_UPDATE \
--response_table $RESPONSE_TABLE \
--amplitude_operation_name $AMPLITUDE_OPERATION_NAME \
--amplitude_api_key $API_KEY
Loading