Skip to content

Commit

Permalink
feat: Jenkins instance to backfill user properties on Amplitude.
Browse files Browse the repository at this point in the history
  • Loading branch information
usama101 committed Aug 29, 2023
1 parent 3fbac4a commit 14d0907
Show file tree
Hide file tree
Showing 3 changed files with 103 additions and 0 deletions.
61 changes: 61 additions & 0 deletions dataeng/jobs/analytics/AmplitudeUserPropertiesBackfill.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
package analytics

import static org.edx.jenkins.dsl.AnalyticsConstants.common_authorization
import static org.edx.jenkins.dsl.AnalyticsConstants.common_log_rotator
import static org.edx.jenkins.dsl.AnalyticsConstants.common_publishers
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm_parameters
import static org.edx.jenkins.dsl.AnalyticsConstants.secure_scm


class AmplitudeUserPropertiesBackfill {
public static def job = { dslFactory, allVars ->
dslFactory.job("amplitude-user-properties-backfill") {
logRotator common_log_rotator(allVars)
authorization common_authorization(allVars)
parameters secure_scm_parameters(allVars)
parameters {
stringParam('ANALYTICS_TOOLS_URL', allVars.get('ANALYTICS_TOOLS_URL'), 'URL for the analytics tools repo.')
stringParam('ANALYTICS_TOOLS_BRANCH', allVars.get('ANALYTICS_TOOLS_BRANCH'), 'Branch of analytics tools repo to use.')
stringParam('NOTIFY', allVars.get('NOTIFY','$PAGER_NOTIFY'), 'Space separated list of emails to send notifications to.')
stringParam('PYTHON_VENV_VERSION', 'python3.7', 'Python virtual environment version to used.')
stringParam('ENVIRONMENT', '', 'edx environment which contains Amplitude credentials.')
stringParam('AMPLITUDE_DATA_SOURCE_TABLE', '', 'Table name that has data which needs to be updated on Amplitude. It should have format like database.schema.table.')
stringParam('COLUMNS_TO_UPDATE', '', 'Columns that you want to update. Separate multiple columns with commas.')
stringParam('RESPONSE_TABLE', '', 'Output table which will store the updated data along with response from API endpoint.')
stringParam('AMPLITUDE_OPERATION_NAME', '', 'Amplitude user property operation name. e.g: set or setOnce.')
}
environmentVariables {
env('KEY_PATH', allVars.get('KEY_PATH'))
env('PASSPHRASE_PATH', allVars.get('PASSPHRASE_PATH'))
env('USER', allVars.get('USER'))
env('ACCOUNT', allVars.get('ACCOUNT'))
env('AMPLITUDE_VAULT_KV_PATH', allVars.get('AMPLITUDE_VAULT_KV_PATH'))
env('AMPLITUDE_VAULT_KV_VERSION', allVars.get('AMPLITUDE_VAULT_KV_VERSION'))
}
multiscm secure_scm(allVars) << {
git {
remote {
url('$ANALYTICS_TOOLS_URL')
branch('$ANALYTICS_TOOLS_BRANCH')
credentials('1')
}
extensions {
relativeTargetDirectory('analytics-tools')
pruneBranches()
cleanAfterCheckout()
}
}
}
wrappers {
timestamps()
credentialsBinding {
usernamePassword('ANALYTICS_VAULT_ROLE_ID', 'ANALYTICS_VAULT_SECRET_ID', 'analytics-vault');
}
}
publishers common_publishers(allVars)
steps {
shell(dslFactory.readFileFromWorkspace('dataeng/resources/amplitude-properties-backfill.sh'))
}
}
}
}
2 changes: 2 additions & 0 deletions dataeng/jobs/createJobsNew.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import static analytics.RetirementJobEdxTriggers.job as RetirementJobEdxTriggers
import static analytics.RetirementJobs.job as RetirementJobsJob
import static analytics.SnowflakeCollectMetrics.job as SnowflakeCollectMetricsJob
import static analytics.SnowflakeExpirePasswords.job as SnowflakeExpirePasswordsJob
import static analytics.AmplitudeUserPropertiesBackfill.job as AmplitudeUserPropertiesBackfillJob
import static analytics.SnowflakeSchemaBuilder.job as SnowflakeSchemaBuilderJob
import static analytics.SnowflakeUserRetirementStatusCleanup.job as SnowflakeUserRetirementStatusCleanupJob
import static analytics.PrefectFlowsDeployment.job as PrefectFlowsDeploymentJob
Expand Down Expand Up @@ -50,6 +51,7 @@ def taskMap = [
RETIREMENT_JOBS_JOB: RetirementJobsJob,
SNOWFLAKE_COLLECT_METRICS_JOB: SnowflakeCollectMetricsJob,
SNOWFLAKE_EXPIRE_PASSWORDS_JOB: SnowflakeExpirePasswordsJob,
AMPLITUDE_USER_PROPERTIES_BACKFILL_JOB: AmplitudeUserPropertiesBackfillJob
SNOWFLAKE_SCHEMA_BUILDER_JOB: SnowflakeSchemaBuilderJob,
SNOWFLAKE_USER_RETIREMENT_STATUS_CLEANUP_JOB: SnowflakeUserRetirementStatusCleanupJob,
PREFECT_FLOWS_DEPLOYMENT_JOB: PrefectFlowsDeploymentJob,
Expand Down
40 changes: 40 additions & 0 deletions dataeng/resources/amplitude-properties-backfill.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env bash
set -ex

# Creating Python virtual env
PYTHON_VENV="python_venv"
virtualenv --python=$PYTHON_VENV_VERSION --clear "${PYTHON_VENV}"
source "${PYTHON_VENV}/bin/activate"

# Setup
cd $WORKSPACE/analytics-tools/snowflake
make requirements

# Do not print commands in this function since they may contain secrets.
set +x

# Retrieve a vault token corresponding to the jenkins AppRole. The token is then stored in the VAULT_TOKEN variable
# which is implicitly used by subsequent vault commands within this script.
# Instructions followed: https://learn.hashicorp.com/tutorials/vault/approle#step-4-login-with-roleid-secretid
export VAULT_TOKEN=$(vault write -field=token auth/approle/login \
role_id=${ANALYTICS_VAULT_ROLE_ID} \
secret_id=${ANALYTICS_VAULT_SECRET_ID}
)

API_KEY=$(
vault kv get \
-version=${AMPLITUDE_VAULT_KV_VERSION} \
-field=api_key \
${AMPLITUDE_VAULT_KV_PATH} \
)

python amplitude_user_properties_update.py \
--key_path $KEY_PATH \
--passphrase_path $PASSPHRASE_PATH \
--automation_user $USER \
--account $ACCOUNT \
--amplitude_data_source_table $AMPLITUDE_DATA_SOURCE_TABLE \
--columns_to_update $COLUMNS_TO_UPDATE \
--response_table $RESPONSE_TABLE \
--amplitude_operation_name $AMPLITUDE_OPERATION_NAME \
--api_key $API_KEY

0 comments on commit 14d0907

Please sign in to comment.