Permalink
Browse files

Pushing incomplete new backfill code to share it with Selena.

  • Loading branch information...
1 parent 6e4ef72 commit 3388e0c51830fe9fcba032c032c41f74d215278d @jberkus committed Sep 25, 2012
@@ -0,0 +1,16 @@
+.. This Source Code Form is subject to the terms of the Mozilla Public
+.. License, v. 2.0. If a copy of the MPL was not distributed with this
+.. file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#.# Database Updates
+====================
+
+This batch makes the following database changes:
+
+bug #
+ Fix something something something
+
+...
+
+The above changes should take only a few minutes to deploy.
+This upgrade does not require a downtime.
@@ -0,0 +1,22 @@
+\set ON_ERROR_STOP 1
+
+SELECT create_table_if_not_exists ('matview_control',
+$x$
+CREATE TABLE matview_control (
+ matview citext not null primary key,
+ update_function citext not null,
+ backfill_function citext not null,
+ dependancies citext,
+ timing citext not null CHECK (
+ timing in 'hourly','daily','cumulative','lastday'),
+ enabled boolean not null default true,
+ fill_order int not null default 99,
+ adu_related boolean not null default false,
+ notes text
+);
+$x$,'postgres');
+
+\set ON_ERROR_STOP 0
+-- this may error out if we're doing it for the second time, so ignore errors
+
+\copy matview_control FROM 'matview_update_grid.csv' WITH CSV HEADER
@@ -0,0 +1,166 @@
+#!/usr/bin/python
+
+import psycopg2
+import psycopg2.extensions
+import psycopg2.extras
+from optparse import OptionParser
+import datetime
+from datetime import datetime
+from datetime import date
+import os
+
+parser = OptionParser()
+parser.add_option("-D", "--database", dest="dbname",
+ help="database to upgrade", metavar="DBNAME",
+ default="breakpad")
+parser.add_option("-p", "--port", dest="dbport",
+ help="database port", metavar="DBPORT",
+ default="5432")
+parser.add_option("-h", "--host", dest="dbhost",
+ help="database hostname", metavar="DBHOST",
+ default="localhost")
+parser.add_option("-l", "--log", dest="logfile",
+ help="logfile location for output",
+ metavar="LOGFILE", default="backfill.log")
+parser.add_option("-s", "--start", dest="start",
+ help="UTC date to backfill from (required)", metavar="YYYY-MM-DD:HR")
+parser.add_option("-e", "--end", dest="end",
+ help="UTC date to backfill to (inclusive)", metavar="YYYY-MM-DD:HR"
+parser.add_option("-N", "--no_hourly", dest="no_hourly",
+ action="store_true", help="exclude hourly backfill")
+parser.add_option("-A", "--adu", dest="adu",
+ action="store_true", help="backfill only matviews which use adu")
+parser.add_option("-m", "--matviews", dest="matviews",
+ help="list of mativews to backfill", metavar="MV1,MV2,MV3")
+parser.add_option("-c", "--classes", dest="classes",
+ help="list of matview classes to backfill", metavar="C1,C2")
+(options, args) = parser.parse_args()
+if not options.start:
+ parser.error('No start date supplied')
+
+# this script must run in UTC
+
+os.environ['TZ'] = 'UTC'
+
+# check, convert dates
+
+startdate = datetime.strptime(options.start, "%Y-%m-%d:%H")
+if options.enddate:
+ enddate = datetime.strptime(options.end, "%Y-%m-%d:%H")
+else:
+ # if no enddate supplied, backfill to 3 hours ago
+ enddate = datetime.today() - timedelta(hours=3)
+
+# convert matview list, if supplied
+
+if options.matviews:
+ allmatviews = False
+ runmatviews = options.matviews.split(',')
+else:
+ allmativews = True
+
+# class list, if supplied
+
+if options.classes:
+ runclasses = options.classes.split(',')
+else:
+ runclasses = [ 'cumulative','hourly','daily','lastday' ]
+
+# connect to database
+
+conn = psycopg2.connect("dbname=%s user=postgres"
+ % ( options.dbname, ) )
+cur = conn.cursor()
+
+# run all jobs marked "cumulative"
+
+def getjoblist ( jobclass, jobdate ):
+
+ if jobclass in runclasses:
+ getsql = """SELECT DISTINCT backfill_function
+ FROM matview_control
+ WHERE timing = '%s'
+ AND enabled """ % jobclass
+ if not allmatviews then:
+ getsql += " AND matview IN ( %s ) " % options.matviews
+ if options.adu then:
+ getsql += " AND adu_related "
+ getsql += " ORDER BY fill_order;"
+ else
+ return false
+
+
+
+run_jobs( cur, getsql )
+
+# for each day in backfill period:
+
+# a. run hourly jobs
+
+# b. run daily jobs
+
+# update successful day
+
+# run "last day only" jobs for last successful day
+
+# exit
+
+startts = datetime.date.today() - datetime.timedelta(weeks=options.weeks)
+enddate = datetime.date.today() - datetime.timedelta(days=1)
+
+def funcdateloop( mycur, starts, ends, fillfunc ):
+ curdate = starts
+ while (curdate <= ends):
+ qrytext = """SELECT %s('%s',false)""" % ( fillfunc, curdate.strftime("%Y-%m-%d"), )
+ mycur.execute(qrytxt)
+ curdate = curdate + datetime.timedelta(days=1)
+
+conn = psycopg2.connect("dbname=%s user=postgres"
+ % ( options.dbname, ) )
+cur = conn.cursor()
+
+# populate build_adu back X weeks
+
+print 'backfilling %d weeks of build_adu'
+funcdateloop(cur, startdate, enddate, 'update_build_adu');
+
+# copy over daily_crashes back to the beginning of product_adu
+# up to the beginning of the backfill era
+# to crash_by_user
+
+print 'copying data from daily_crashes to crashes_by_user'
+
+cur.execute("""INSERT INTO crashes_by_user (
+ ( product_version_id, os_short_name, crash_type_id,
+ report_date, report_count, adu )
+ SELECT productdims_id, os_short_name, crash_type_id,
+ adu_day, count, adu_count
+ FROM daily_crashes JOIN product_versions
+ ON productdims_id = product_versions.product_version_id
+ JOIN crash_types ON report_type = old_code
+ JOIN product_adu ON productdims_id = product_adu.product_version_id
+ WHERE adu_day < %s """, ( startdate, ) )
+
+# now backfill the rest
+print 'backfilling %d weeks of crash_by_user'
+funcdateloop(cur, startdate, enddate, 'update_crash_by_user');
+
+# populate crash_by_user_build back X weeks
+
+print 'backfilling %d weeks of crash_by_user_build'
+funcdateloop(cur, startdate, enddate, 'update_crash_by_user_build');
+
+# populate home_page_graph back X weeks
+print 'backfilling %d weeks of home_page_graph'
+funcdateloop(cur, startdate, enddate, 'update_home_page_graph');
+
+# populate home_page_graph_build back X weeks
+print 'backfilling %d weeks of home_page_graph_build'
+funcdateloop(cur, startdate, enddate, 'update_home_page_graph_build');
+
+# populate tcbs_build back X weeks
+print 'backfilling %d weeks of tcbs_build'
+funcdateloop(cur, startdate, enddate, 'update_tcbs_build');
+
+print 'done backfilling'
+print 'you may now run QA automation'
@@ -0,0 +1,22 @@
+"Matview","Update Function","Backfill Function","Depends On","Group","Enabled","Order","adu_related","Notes"
+"reports_duplicates","update_reports_duplicates","backfill_reports_duplicates_hourly",,"hourly",TRUE,1,FALSE,
+"reports_clean","update_reports_clean","backfill_reports_clean_hourly","reports_duplicates, product_version","hourly",TRUE,2,FALSE,
+"product_version","update_product_versions","update_product_versions","ftpscraper","cumulative",TRUE,1,FALSE,
+"build_adu","update_build_adu","backfill_build_adu","raw_adu","daily",TRUE,2,TRUE,
+"product_adu","update_adu","backfill_adu","raw_adu","daily",TRUE,1,TRUE,
+"crashes_by_user","update_crashes_by_user","backfill_crashes_by_user","product_adu","daily",TRUE,5,TRUE,
+"crashes_by_user_build","update_crashes_by_user_build","backfill_crashes_by_user_build","build_adu","daily",TRUE,6,TRUE,
+"correlations","update_correlations","backfill_correlations","NA","lastday",TRUE,99,FALSE,
+"correlations_addons","update_correlations","backfill_correlations","NA","lastday",TRUE,99,FALSE,
+"correlations_cores","update_correlations","backfill_correlations","NA","lastday",TRUE,99,FALSE,
+"correlations_modules","update_correlations","backfill_correlations","NA","lastday",FALSE,99,FALSE,
+"daily_hangs","update_hang_report","backfill_hang_report",,"daily",TRUE,99,FALSE,
+"home_page_graph","update_home_page_graph","backfill_home_page_graph","product_adu","daily",TRUE,7,TRUE,
+"home_page_graph_build","update_home_page_graph_build","backfill_home_page_graph_build","build_adu","daily",TRUE,8,TRUE,
+"nightly_builds","update_nightly_builds","backfill_nightly_builds",,"daily",TRUE,99,FALSE,
+"signature_products","update_signatures","backfill_signature_counts",,"daily",TRUE,9,FALSE,
+"signature_products_rollup","update_signatures","backfill_signature_counts",,"daily",TRUE,10,FALSE,
+"tcbs","update_tcbs","backfill_tcbs",,"daily",TRUE,3,FALSE,
+"tcbs_build","update_tcbs_build","backfill_tcbs_build",,"daily",TRUE,4,FALSE,
+"explosiveness","update_explosiveness","backfill_explosiveness","tcbs, build_adu","lastday",TRUE,99,TRUE,
+"rank_compare","update_rank_compare","backfill_rank_compare","tcbs","lastday",TRUE,99,FALSE,
@@ -0,0 +1,35 @@
+#!/bin/bash
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#please see README
+
+set -e
+
+CURDIR=$(dirname $0)
+DBNAME=$1
+: ${DBNAME:="breakpad"}
+VERSION=#.#
+
+#echo '*********************************************************'
+#echo 'support functions'
+#psql -f ${CURDIR}/support_functions.sql $DBNAME
+
+echo '*********************************************************'
+echo 'fix '
+echo 'no bug'
+psql -f ${CURDIR}/_____.sql $DBNAME
+
+echo '*********************************************************'
+echo 'fix '
+echo 'bug ######'
+psql -f ${CURDIR}/_____.sql $DBNAME
+psql -f ${CURDIR}/_____.sql $DBNAME
+
+#change version in DB
+psql -c "SELECT update_socorro_db_version( '$VERSION' )" $DBNAME
+
+echo "$VERSION upgrade done"
+
+exit 0

0 comments on commit 3388e0c

Please sign in to comment.