Skip to content

Commit

Permalink
Made several improvements to JotForm sync to reduce API load and impr…
Browse files Browse the repository at this point in the history
…ove resilience
  • Loading branch information
Andrew Oberhardt authored and Andrew Oberhardt committed Jun 7, 2018
1 parent c9ba776 commit fccf593
Show file tree
Hide file tree
Showing 9 changed files with 77 additions and 46 deletions.
2 changes: 1 addition & 1 deletion cookbooks/cdo-apps/templates/default/crontab.erb
Expand Up @@ -85,7 +85,7 @@
cronjob at:'30 14 * * *', do:dashboard_dir('bin','scheduled_pd_workshop_emails')
cronjob at:'* 14 * * 1', do:dashboard_dir('bin','scheduled_pd_application_emails')
cronjob at:'10 */12 * * *', do:dashboard_dir('bin','refresh_pd_workshop_material_orders')
cronjob at:'*/1 * * * *', do:dashboard_dir('bin', 'sync_jotforms')
cronjob at:'*/5 * * * *', do:dashboard_dir('bin', 'sync_jotforms')
cronjob at:'*/1 * * * *', do:deploy_dir('bin', 'cron', 'index_users_in_solr')
cronjob at:'25 7 * * *', do:deploy_dir('bin', 'cron', 'update_hoc_map')
cronjob at:'19 4 * * *', do:deploy_dir('bin', 'cron', 'update_census_map')
Expand Down
79 changes: 57 additions & 22 deletions dashboard/app/models/concerns/pd/jot_form_backed_form.rb
Expand Up @@ -5,6 +5,8 @@
# - form_id: JotForm form id
# - submission_id: JotForm submission id
# - answers: JotForm submission answer data
# Required JotForm question names:
# - environment: Set to Rails.env, so we can filter out results from other environments
module Pd
module JotFormBackedForm
extend ActiveSupport::Concern
Expand All @@ -14,6 +16,8 @@ module JotFormBackedForm
validates_presence_of :form_id, :submission_id
end

CACHE_TTL = 5.minutes.freeze

def placeholder?
answers.nil?
end
Expand Down Expand Up @@ -62,45 +66,77 @@ def sync_all_from_jotform
all_form_ids.compact.map {|form_id| sync_from_jotform(form_id)}.sum
end

def sync_questions_from_jotform(form_id)
Pd::SurveyQuestion.sync_from_jotform form_id
def get_questions(form_id, force_sync: false)
cache_key = "Pd::SurveyQuestion.#{form_id}"
if force_sync
# Force sync from jotform (which has an implicit DB save) and write to Rails cache
Pd::SurveyQuestion.sync_from_jotform(form_id).tap do |questions|
Rails.cache.write(cache_key, questions, expires_in: CACHE_TTL)
end
else
# Attempt to fetch from cache, then db, then finally JotForm
Rails.cache.fetch(cache_key, expires_in: CACHE_TTL) do
Pd::SurveyQuestion.find_by(form_id: form_id) || Pd::SurveyQuestion.sync_from_jotform(form_id)
end
end
end

# Download new responses from JotForm
def sync_from_jotform(form_id = nil)
return sync_all_from_jotform unless form_id

sync_questions_from_jotform(form_id)
get_questions(form_id)

JotForm::Translation.new(form_id).get_submissions(
last_known_submission_id: get_last_known_submission_id(form_id),
min_date: get_min_date(form_id)
min_date: get_min_date(form_id),
full_text_search: Rails.env
).map do |submission|
# TODO(Andrew): don't stop the whole set when one fails

answers = submission[:answers]

# When we pass the last_known_submission_id filter, there should be no duplicates,
# But just in case handle them gracefully as an upsert.
find_or_initialize_by(submission.slice(:form_id, :submission_id)).tap do |model|
model.answers = answers.to_json

# Note, form_data_hash processes the answers and will raise an error if they don't match the questions.
# Include hidden questions for full validation and so skip_submission? can inspect them.
next if skip_submission?(model.form_data_hash(show_hidden_questions: true))
model.save!
# Try first to parse the answers with existing question data. On first failure, force sync questions
# and retry. Second failure will propagate and fail the entire sync operation.
Retryable.retryable(sleep: 0, exception_cb: proc {model.force_sync_questions}) do
model.answers = answers.to_json

# Note, form_data_hash processes the answers and will raise an error if they don't match the questions.
# Include hidden questions for full validation and so skip_submission? can inspect them.
if skip_submission?(model.form_data_hash(show_hidden_questions: true))
CDO.log.info "Skipping #{submission[:submission_id]}"
next
end
model.save!
end
end
rescue => e
raise e, "Error processing submission #{submission[:submission_id]} for form #{form_id}: #{e.message}", e.backtrace
end.compact
end

# Override in included class to provide filtering rules
# Override in included class to provide custom filtering rules.
# By default skip other environments. This assumes that environment is a property in the processed answers.
# TODO(Andrew): Filter in the API query if possible, once we hear back from JotForm API support.
# See https://www.jotform.com/answers/1482175-API-Integration-Matrix-answer-returning-false-in-the-API#2
# See https://www.jotform.com/answers/1483561-API-Filter-form-id-submissions-endpoint-with-question-and-answer#4
# @param processed_answers [Hash]
# @return [Boolean] true if this submission should be skipped
def skip_submission?(processed_answers)
environment = processed_answers['environment']
raise "Missing required environment field" unless environment

# Skip other environments. Only keep this environment.
return true if environment != Rails.env

# Is it a duplicate? These will be prevented in the future, but for now log and skip
# TODO(Andrew): prevent duplicates and remove this code.
key_attributes = attribute_mapping.transform_values {|k| processed_answers[k]}
if exists?(key_attributes)
CDO.log.warn "Submission already exists for #{key_attributes}, skipping"
return true
end

false
end

Expand All @@ -119,12 +155,6 @@ def get_form_id(category, name)
raise KeyError, "Mising jotform form: #{category}.#{name}" unless forms.key? name
forms[name].to_i
end

def questions_for_form(form_id)
survey_question = SurveyQuestion.find_by(form_id: form_id)
raise KeyError, "No survey questions for form #{form_id}" unless survey_question
survey_question.form_questions
end
end

# Update answers for this submission from the JotForm API
Expand All @@ -148,9 +178,13 @@ def map_answers_to_attributes
end
end

# Get question for this form
def force_sync_questions
@questions = self.class.get_questions(form_id, force_sync: true)
end

# Get questions for this form
def questions
self.class.questions_for_form(form_id)
@questions ||= self.class.get_questions(form_id)
end

# Answers json parsed in hash form
Expand Down Expand Up @@ -192,6 +226,7 @@ def reload
end

def clear_memoized_values
@questions = nil
@form_data_hash = nil
@sanitized_form_data_hash = nil
end
Expand Down
9 changes: 0 additions & 9 deletions dashboard/app/models/pd/post_course_survey.rb
Expand Up @@ -53,15 +53,6 @@ def self.attribute_mapping
validates_inclusion_of :year, in: VALID_YEARS
validates_inclusion_of :course, in: VALID_COURSES

# Skip other environments. Only keep this environment.
# @override
def skip_submission?(processed_answers)
environment = processed_answers['environment']
raise "Missing required environment field" unless environment

environment != Rails.env
end

def self.form_id
get_form_id 'post_course', CURRENT_YEAR
end
Expand Down
8 changes: 0 additions & 8 deletions dashboard/app/models/pd/workshop_daily_survey.rb
Expand Up @@ -57,14 +57,6 @@ def set_day_from_form_id

validates_inclusion_of :day, in: VALID_DAYS

# Skip other environments. Only keep this environment.
def skip_submission?(processed_answers)
environment = processed_answers['environment']
raise "Missing required environment field" unless environment

environment != Rails.env
end

def self.get_form_id_for_day(day)
get_form_id 'local', "day_#{day}"
end
Expand Down
3 changes: 3 additions & 0 deletions dashboard/app/models/pd/workshop_facilitator_daily_survey.rb
Expand Up @@ -32,6 +32,9 @@ class WorkshopFacilitatorDailySurvey < ActiveRecord::Base
belongs_to :pd_workshop, class_name: 'Pd::Workshop'
belongs_to :facilitator, class_name: 'User', foreign_key: 'facilitator_id'

validates_uniqueness_of :user_id, scope: [:pd_workshop_id, :pd_session_id, :facilitator_id, :form_id],
message: 'already has a submission for this workshop, session, facilitator, and form'

# @override
def self.attribute_mapping
{
Expand Down
2 changes: 1 addition & 1 deletion dashboard/bin/sync_jotforms
Expand Up @@ -5,7 +5,7 @@ require_relative '../config/environment'
JOT_FORM_CLASSES = [
Pd::WorkshopDailySurvey,
Pd::WorkshopFacilitatorDailySurvey,
Pd::PostCourseSurvey
# Pd::PostCourseSurvey -- don't sync this one until we support the datetime control
].freeze

def main
Expand Down
6 changes: 5 additions & 1 deletion dashboard/lib/pd/jot_form/jot_form_rest_client.rb
Expand Up @@ -35,18 +35,22 @@ def get_questions(form_id)
# when specified, only new submissions after the known id will be returned.
# @param min_date [Date] (optional)
# when specified, only new submissions on or after the known date will be returned.
# @param full_text_search [String] (optional)
# Filter to ensure at least one answer matches the given text (% for wildcards)
# JotForm apparently doesn't support filtering answers to specific questions.
# Note - get_submissions has a default limit of 100.
# The API returns the limit (which will be 100), and the count.
# We can add functionality to override the limit if it becomes an issue.
# See https://api.jotform.com/docs/#form-id-submissions
def get_submissions(form_id, last_known_submission_id: nil, min_date: nil)
def get_submissions(form_id, last_known_submission_id: nil, min_date: nil, full_text_search: nil)
params = {
orderby: 'id asc'
}

filter = {}
filter['id:gt'] = last_known_submission_id.to_s if last_known_submission_id
filter['created_at:gt'] = min_date.to_s if min_date
filter['fullText'] = full_text_search if full_text_search
params[:filter] = filter.to_json unless filter.empty?

get "form/#{form_id}/submissions", params
Expand Down
12 changes: 9 additions & 3 deletions dashboard/lib/pd/jot_form/translation.rb
Expand Up @@ -34,15 +34,21 @@ def get_questions
# Retrieves new submissions for this form from JotForm's API
# @param last_known_submission_id [Integer] (optional) - filter to new submissions, since the last known id
# @param min_date [Date] (optional) (optional) filter to new submissions on or after the min date
# @param full_text_search [String] (optional)
# Filter to ensure at least one answer matches the given text (% for wildcards).
# @return [Array<Hash>] array of hashes with keys :form_id, :submission_id, :answers
# where answers is itself a hash of question ids to raw answers.
# Note - these answers are incomplete on their own, and need to be combined with the Question objects
# (from get_questions above)
def get_submissions(last_known_submission_id: nil, min_date: nil)
def get_submissions(last_known_submission_id: nil, min_date: nil, full_text_search: nil)
CDO.log.info "Getting JotForm submissions for #{@form_id} "\
"last_known_submission_id: #{last_known_submission_id}, min_date: #{min_date}"
"last_known_submission_id: #{last_known_submission_id}, min_date: #{min_date}, search: #{full_text_search}"

response = @client.get_submissions(@form_id, last_known_submission_id: last_known_submission_id, min_date: min_date)
response = @client.get_submissions(@form_id,
last_known_submission_id: last_known_submission_id,
min_date: min_date,
full_text_search: full_text_search
)
response['content'].map {|s| parse_jotform_submission(s)}
end

Expand Down
2 changes: 1 addition & 1 deletion dashboard/test/lib/pd/jot_form/translation_test.rb
Expand Up @@ -117,7 +117,7 @@ class TranslationTest < ActiveSupport::TestCase

JotFormRestClient.any_instance.
expects(:get_submissions).
with(@form_id, last_known_submission_id: last_known_submission_id, min_date: nil).
with(@form_id, last_known_submission_id: last_known_submission_id, min_date: nil, full_text_search: nil).
returns(get_submissions_result)

result = Translation.new(@form_id).get_submissions(last_known_submission_id: last_known_submission_id)
Expand Down

0 comments on commit fccf593

Please sign in to comment.