Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parent email to new contact rollups process #34541

Merged
merged 9 commits into from May 5, 2020
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions dashboard/app/models/contact_rollups_processed.rb
Expand Up @@ -38,7 +38,7 @@ def self.import_from_raw_table(batch_size = DEFAULT_BATCH_SIZE)
# Because GROUP_CONCAT returns a string, we add a parser function to convert the result to a hash.
group_by_query = <<-SQL.squish
SELECT email, CONCAT('[', GROUP_CONCAT(data_and_metadata), ']') AS all_data_and_metadata
FROM (#{select_query}) AS sub_query
FROM (#{select_query}) AS subquery
hacodeorg marked this conversation as resolved.
Show resolved Hide resolved
GROUP BY email
SQL

Expand All @@ -48,7 +48,7 @@ def self.import_from_raw_table(batch_size = DEFAULT_BATCH_SIZE)
contact_data = parse_contact_data(contact['all_data_and_metadata'])

processed_contact_data = {}
processed_contact_data.merge!(extract_field(contact_data, 'dashboard.email_preferences', 'opt_in') || {})
processed_contact_data.merge!(extract_field(contact_data, "#{CDO.dashboard_db_name}.email_preferences", 'opt_in') || {})
processed_contact_data.merge!(extract_updated_at(contact_data) || {})

batch << {email: contact['email'], data: processed_contact_data}
Expand Down
48 changes: 42 additions & 6 deletions dashboard/app/models/contact_rollups_raw.rb
Expand Up @@ -23,25 +23,44 @@ def self.truncate_table
end

def self.extract_email_preferences
query = extract_from_source_query('email_preferences', ['opt_in'], 'email')
query = get_extraction_query('email_preferences', false, ['opt_in'], 'email')
ActiveRecord::Base.connection.execute(query)
end

# @param source [String] Source from which we want to extract data
def self.extract_parent_emails
source_sql = <<~SQL
SELECT parent_email, MAX(updated_at) AS updated_at
FROM users
GROUP BY parent_email
SQL

query = get_extraction_query(source_sql, true, [], 'parent_email', "#{CDO.dashboard_db_name}.users.parent_email")
ActiveRecord::Base.connection.execute(query)
end

# @param source [String] Source from which we want to extract data (can be a dashboard table name, or subquery)
# @param source_is_subquery [Boolean] True if source is a subquery, rather than a table name
# @param data_columns [Array] Columns we want reshaped into a single JSON object
# @param email_column [String] Column in source table we want to insert ino the email column
# @param source_name [String] Name for source (should be non-nil if using a subquery or non-dashboard table)
# @return [String] A SQL statement to extract and reshape data from the source table.
def self.extract_from_source_query(source, data_columns, email_column)
def self.get_extraction_query(source, source_is_subquery, data_columns, email_column, source_name=nil)
if source_name.nil? && source_is_subquery
raise "Source name required if source is a subquery"
end

wrapped_source, sources_column = format_source(source, source_is_subquery, source_name)
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved

<<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
#{email_column},
'dashboard.#{source}' AS sources,
'#{sources_column}' AS sources,
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved
#{create_json_object(data_columns)} AS data,
#{source}.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM #{source}
FROM #{wrapped_source}
WHERE #{email_column} IS NOT NULL AND #{email_column} != ''
SQL
end
Expand All @@ -58,4 +77,21 @@ def self.create_json_object(columns)

'JSON_OBJECT(' + columns.map {|col| "'#{col}',#{col}"}.join(',') + ')'
end

# Returns an array of:
# the appropriate SQL syntax to be used on the FROM line
# the appropriate "sources" column
# @example
# When no source name provided (for dashboard tables)
# Input: ['email_preferences', false, nil]
# Output: ['email_preferences', 'dashboard.email_preferences']
# @example
# When a source name provided (for subqueries / non-dashboard tables)
# Input: ['SELECT DISTINCT parent_email FROM users', true, 'dashboard.users.parent_email']
# Output: ['(SELECT DISTINCT parent_email FROM users) as subquery', 'dashboard.users.parent_email']
def self.format_source(source, source_is_subquery, source_name)
source_is_subquery ?
["(#{source}) AS subquery", source_name] :
[source, "#{CDO.dashboard_db_name}.#{source}"]
end
end
4 changes: 4 additions & 0 deletions dashboard/lib/contact_rollups_v2.rb
Expand Up @@ -9,6 +9,10 @@ def self.build_contact_rollups(log_collector, sync_with_pardot=false)
ContactRollupsRaw.extract_email_preferences
end

log_collector.time!('Extracts parent emails from dashboard users') do
ContactRollupsRaw.extract_parent_emails
end

log_collector.time!('Processes all extracted data') do
ContactRollupsProcessed.import_from_raw_table
end
Expand Down
6 changes: 3 additions & 3 deletions dashboard/test/models/contact_rollups_processed_test.rb
Expand Up @@ -39,7 +39,7 @@ class ContactRollupsProcessedTest < ActiveSupport::TestCase
create :contact_rollups_raw, email: email,
data: nil, data_updated_at: base_time - 1.day
create :contact_rollups_raw, email: email,
sources: 'dashboard.email_preferences', data: {opt_in: 1}, data_updated_at: base_time
sources: "#{CDO.dashboard_db_name}.email_preferences", data: {opt_in: 1}, data_updated_at: base_time

ContactRollupsProcessed.import_from_raw_table

Expand All @@ -61,13 +61,13 @@ class ContactRollupsProcessedTest < ActiveSupport::TestCase
end

test 'extract_field' do
table = 'dashboard.email_preferences'
table = "#{CDO.dashboard_db_name}.email_preferences"
field = 'opt_in'

test_cases = [
{input: [{}, nil, nil], expected_output: nil},
{input: [{table => {}}, table, field], expected_output: nil},
{input: [{'dashboard.another_table' => {opt_in: 1}}, table, field], expected_output: nil},
{input: [{"#{CDO.dashboard_db_name}.another_table" => {opt_in: 1}}, table, field], expected_output: nil},
{input: [{table => {'opt_in' => 0}}, table, field], expected_output: {opt_in: 0}},
{input: [{table => {'opt_in' => 1}}, table, field], expected_output: {opt_in: 1}},
{input: [{table => {'opt_in' => nil}}, table, field], expected_output: {opt_in: nil}}
Expand Down
59 changes: 47 additions & 12 deletions dashboard/test/models/contact_rollups_raw_test.rb
Expand Up @@ -9,59 +9,94 @@ class ContactRollupsRawTest < ActiveSupport::TestCase
expected_data = {opt_in: email_preference.opt_in ? 1 : 0}
result = ContactRollupsRaw.find_by(
email: email_preference.email,
sources: "dashboard.#{email_preference.class.table_name}"
sources: "#{CDO.dashboard_db_name}.email_preferences"
)

assert_equal expected_data, result.data.symbolize_keys
end

test 'extract_parent_email creates records as we would expect' do
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved
student = create :student, parent_email: 'caring@parent.com'
ContactRollupsRaw.extract_parent_emails

# confirms that a) record exists, and b) data is blank
refute_nil ContactRollupsRaw.find_by(
email: student.parent_email,
sources: "#{CDO.dashboard_db_name}.users.parent_email",
data: nil
)
end

test 'extract_email_preferences can import many email preferences' do
3.times {|i| create :email_preference, email: "contact_#{i}@rollups.com"}
ContactRollupsRaw.extract_email_preferences
assert 3, ContactRollupsRaw.count
end

test 'extract_from_source_query can import when data column is null' do
test 'get_extraction_query can import when data column is null' do
teacher = create :teacher

query = ContactRollupsRaw.extract_from_source_query('users', [], 'email')
query = ContactRollupsRaw.get_extraction_query('users', false, [], 'email')
ActiveRecord::Base.connection.execute(query)

refute_nil ContactRollupsRaw.find_by(email: teacher.email, data: nil, sources: 'dashboard.users')
refute_nil ContactRollupsRaw.find_by(email: teacher.email, data: nil, sources: "#{CDO.dashboard_db_name}.users")
end

test 'get_extraction_query can import when source is a subquery' do
first_child = create :student, parent_email: 'caring@parent.com'
second_child = create :student, parent_email: 'caring@parent.com'

# we're not actually interested in user IDs in contact rollups
# just a simple example of something we could extract in a subquery
subquery = <<~SQL
SELECT parent_email, max(updated_at) as updated_at, max(id) as higher_student_id
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved
FROM users
GROUP BY 1
SQL

query = ContactRollupsRaw.get_extraction_query(subquery, true, ['higher_student_id'], 'parent_email', "#{CDO.dashboard_db_name}.users.id")
ActiveRecord::Base.connection.execute(query)

refute_empty ContactRollupsRaw.where(
"email = :email and data->'$.higher_student_id' = :higher_student_id and sources = :sources",
email: first_child.parent_email,
sources: "#{CDO.dashboard_db_name}.users.id",
higher_student_id: second_child.id
)
end

test 'extract_from_source_query looks as expected when called with a single column' do
test 'get_extraction_query looks as expected when called with a single column' do
expected_sql = <<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
email,
'dashboard.email_preferences' AS sources,
'#{CDO.dashboard_db_name}.email_preferences' AS sources,
JSON_OBJECT('opt_in',opt_in) AS data,
email_preferences.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM email_preferences
WHERE email IS NOT NULL AND email != ''
SQL

assert_equal expected_sql, ContactRollupsRaw.extract_from_source_query('email_preferences', ['opt_in'], 'email')
assert_equal expected_sql, ContactRollupsRaw.get_extraction_query('email_preferences', false, ['opt_in'], 'email')
end

test 'extract_from_source_query looks as expected when called with multiple columns' do
test 'get_extraction_query looks as expected when called with multiple columns' do
expected_sql = <<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
parent_email,
'dashboard.users' AS sources,
'#{CDO.dashboard_db_name}.users' AS sources,
JSON_OBJECT('birthday',birthday,'gender',gender) AS data,
users.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM users
WHERE parent_email IS NOT NULL AND parent_email != ''
SQL

assert_equal expected_sql, ContactRollupsRaw.extract_from_source_query('users', ['birthday', 'gender'], 'parent_email')
assert_equal expected_sql, ContactRollupsRaw.get_extraction_query('users', false, ['birthday', 'gender'], 'parent_email')
end

test 'create_json_object looks as expected when called with single column' do
Expand Down