Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add parent email to new contact rollups process #34541

Merged
merged 9 commits into from May 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions dashboard/app/models/contact_rollups_processed.rb
Expand Up @@ -38,7 +38,7 @@ def self.import_from_raw_table(batch_size = DEFAULT_BATCH_SIZE)
# Because GROUP_CONCAT returns a string, we add a parser function to convert the result to a hash.
group_by_query = <<-SQL.squish
SELECT email, CONCAT('[', GROUP_CONCAT(data_and_metadata), ']') AS all_data_and_metadata
FROM (#{select_query}) AS sub_query
FROM (#{select_query}) AS subquery
hacodeorg marked this conversation as resolved.
Show resolved Hide resolved
GROUP BY email
SQL

Expand All @@ -48,7 +48,7 @@ def self.import_from_raw_table(batch_size = DEFAULT_BATCH_SIZE)
contact_data = parse_contact_data(contact['all_data_and_metadata'])

processed_contact_data = {}
processed_contact_data.merge!(extract_field(contact_data, 'dashboard.email_preferences', 'opt_in') || {})
processed_contact_data.merge!(extract_field(contact_data, "#{CDO.dashboard_db_name}.email_preferences", 'opt_in') || {})
processed_contact_data.merge!(extract_updated_at(contact_data) || {})

batch << {email: contact['email'], data: processed_contact_data}
Expand Down
34 changes: 27 additions & 7 deletions dashboard/app/models/contact_rollups_raw.rb
Expand Up @@ -23,25 +23,45 @@ def self.truncate_table
end

def self.extract_email_preferences
query = extract_from_source_query('email_preferences', ['opt_in'], 'email')
query = get_extraction_query("#{CDO.dashboard_db_name}.email_preferences", 'email', ['opt_in'])
ActiveRecord::Base.connection.execute(query)
end

# @param source [String] Source from which we want to extract data
# @param data_columns [Array] Columns we want reshaped into a single JSON object
def self.extract_parent_emails
source_sql = <<~SQL
SELECT parent_email, MAX(updated_at) AS updated_at
FROM users
GROUP BY parent_email
SQL

query = get_extraction_query(source_sql, 'parent_email', [], true, "#{CDO.dashboard_db_name}.users.parent_email")
ActiveRecord::Base.connection.execute(query)
end

# @param source [String] Source from which we want to extract data (can be a dashboard table name, or subquery)
# @param email_column [String] Column in source table we want to insert ino the email column
# @param data_columns [Array] Columns we want reshaped into a single JSON object
# @param source_is_subquery [Boolean] (default false) Set to true if source is a subquery, rather than a table name
# @param source_name [String] (default nil) Name for source (non-nil if using a subquery)
# @return [String] A SQL statement to extract and reshape data from the source table.
def self.extract_from_source_query(source, data_columns, email_column)
def self.get_extraction_query(source, email_column, data_columns, source_is_subquery=false, source_name=nil)
if source_name.nil? && source_is_subquery
raise "Source name required if source is a subquery"
end

source_name ||= source
wrapped_source = source_is_subquery ? "(#{source}) AS subquery" : source

<<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
#{email_column},
'dashboard.#{source}' AS sources,
'#{source_name}' AS sources,
#{create_json_object(data_columns)} AS data,
#{source}.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM #{source}
FROM #{wrapped_source}
WHERE #{email_column} IS NOT NULL AND #{email_column} != ''
SQL
end
Expand Down
4 changes: 4 additions & 0 deletions dashboard/lib/contact_rollups_v2.rb
Expand Up @@ -9,6 +9,10 @@ def self.build_contact_rollups(log_collector, sync_with_pardot=false)
ContactRollupsRaw.extract_email_preferences
end

log_collector.time!('Extracts parent emails from dashboard users') do
ContactRollupsRaw.extract_parent_emails
end

log_collector.time!('Processes all extracted data') do
ContactRollupsProcessed.import_from_raw_table
end
Expand Down
6 changes: 3 additions & 3 deletions dashboard/test/models/contact_rollups_processed_test.rb
Expand Up @@ -39,7 +39,7 @@ class ContactRollupsProcessedTest < ActiveSupport::TestCase
create :contact_rollups_raw, email: email,
data: nil, data_updated_at: base_time - 1.day
create :contact_rollups_raw, email: email,
sources: 'dashboard.email_preferences', data: {opt_in: 1}, data_updated_at: base_time
sources: "#{CDO.dashboard_db_name}.email_preferences", data: {opt_in: 1}, data_updated_at: base_time

ContactRollupsProcessed.import_from_raw_table

Expand All @@ -61,13 +61,13 @@ class ContactRollupsProcessedTest < ActiveSupport::TestCase
end

test 'extract_field' do
table = 'dashboard.email_preferences'
table = "#{CDO.dashboard_db_name}.email_preferences"
field = 'opt_in'

test_cases = [
{input: [{}, nil, nil], expected_output: nil},
{input: [{table => {}}, table, field], expected_output: nil},
{input: [{'dashboard.another_table' => {opt_in: 1}}, table, field], expected_output: nil},
{input: [{"#{CDO.dashboard_db_name}.another_table" => {opt_in: 1}}, table, field], expected_output: nil},
{input: [{table => {'opt_in' => 0}}, table, field], expected_output: {opt_in: 0}},
{input: [{table => {'opt_in' => 1}}, table, field], expected_output: {opt_in: 1}},
{input: [{table => {'opt_in' => nil}}, table, field], expected_output: {opt_in: nil}}
Expand Down
63 changes: 49 additions & 14 deletions dashboard/test/models/contact_rollups_raw_test.rb
Expand Up @@ -9,7 +9,7 @@ class ContactRollupsRawTest < ActiveSupport::TestCase
expected_data = {opt_in: email_preference.opt_in ? 1 : 0}
result = ContactRollupsRaw.find_by(
email: email_preference.email,
sources: "dashboard.#{email_preference.class.table_name}"
sources: "#{CDO.dashboard_db_name}.email_preferences"
)

assert_equal expected_data, result.data.symbolize_keys
Expand All @@ -21,47 +21,82 @@ class ContactRollupsRawTest < ActiveSupport::TestCase
assert 3, ContactRollupsRaw.count
end

test 'extract_from_source_query can import when data column is null' do
test 'extract_parent_email creates records as we would expect' do
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved
student = create :student, parent_email: 'caring@parent.com'
ContactRollupsRaw.extract_parent_emails

# confirms that a) record exists, and b) data is blank
refute_nil ContactRollupsRaw.find_by(
email: student.parent_email,
sources: "#{CDO.dashboard_db_name}.users.parent_email",
data: nil
)
end

test 'get_extraction_query can import when data column is null' do
teacher = create :teacher

query = ContactRollupsRaw.extract_from_source_query('users', [], 'email')
query = ContactRollupsRaw.get_extraction_query("#{CDO.dashboard_db_name}.users", 'email', [])
ActiveRecord::Base.connection.execute(query)

refute_nil ContactRollupsRaw.find_by(email: teacher.email, data: nil, sources: "#{CDO.dashboard_db_name}.users")
end

test 'get_extraction_query can import when source is a subquery' do
first_child = create :student, parent_email: 'caring@parent.com'
second_child = create :student, parent_email: 'caring@parent.com'

# we're not actually interested in user IDs in contact rollups
# just a simple example of something we could extract in a subquery
subquery = <<~SQL
SELECT parent_email, max(updated_at) as updated_at, max(id) as higher_student_id
bencodeorg marked this conversation as resolved.
Show resolved Hide resolved
FROM users
GROUP BY parent_email
SQL

query = ContactRollupsRaw.get_extraction_query(subquery, 'parent_email', ['higher_student_id'], true, "#{CDO.dashboard_db_name}.users.id")
ActiveRecord::Base.connection.execute(query)

refute_nil ContactRollupsRaw.find_by(email: teacher.email, data: nil, sources: 'dashboard.users')
refute_empty ContactRollupsRaw.where(
"email = :email and data->'$.higher_student_id' = :higher_student_id and sources = :sources",
email: first_child.parent_email,
sources: "#{CDO.dashboard_db_name}.users.id",
higher_student_id: second_child.id
)
end

test 'extract_from_source_query looks as expected when called with a single column' do
test 'get_extraction_query looks as expected when called with a single column' do
expected_sql = <<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
email,
'dashboard.email_preferences' AS sources,
'#{CDO.dashboard_db_name}.email_preferences' AS sources,
JSON_OBJECT('opt_in',opt_in) AS data,
email_preferences.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM email_preferences
FROM #{CDO.dashboard_db_name}.email_preferences
WHERE email IS NOT NULL AND email != ''
SQL

assert_equal expected_sql, ContactRollupsRaw.extract_from_source_query('email_preferences', ['opt_in'], 'email')
assert_equal expected_sql, ContactRollupsRaw.get_extraction_query("#{CDO.dashboard_db_name}.email_preferences", 'email', ['opt_in'])
end

test 'extract_from_source_query looks as expected when called with multiple columns' do
test 'get_extraction_query looks as expected when called with multiple columns' do
expected_sql = <<~SQL
INSERT INTO #{ContactRollupsRaw.table_name} (email, sources, data, data_updated_at, created_at, updated_at)
SELECT
parent_email,
'dashboard.users' AS sources,
'#{CDO.dashboard_db_name}.users' AS sources,
JSON_OBJECT('birthday',birthday,'gender',gender) AS data,
users.updated_at AS data_updated_at,
updated_at AS data_updated_at,
NOW() AS created_at,
NOW() AS updated_at
FROM users
FROM #{CDO.dashboard_db_name}.users
WHERE parent_email IS NOT NULL AND parent_email != ''
SQL

assert_equal expected_sql, ContactRollupsRaw.extract_from_source_query('users', ['birthday', 'gender'], 'parent_email')
assert_equal expected_sql, ContactRollupsRaw.get_extraction_query("#{CDO.dashboard_db_name}.users", 'parent_email', ['birthday', 'gender'])
end

test 'create_json_object looks as expected when called with single column' do
Expand Down