Permalink
Browse files

change ratio to diff

  • Loading branch information...
Pete Skomoroch
Pete Skomoroch committed Jun 23, 2009
1 parent 671e9d3 commit 2448230688dd636be016fc423b1c99c9db8b8dd7
Showing with 4 additions and 4 deletions.
  1. +2 −2 lib/python_streaming/hive_trend_mapper.py
  2. +1 −1 lib/scripts/daily_load.sh
  3. +1 −1 lib/scripts/run_daily_merge.sh
@@ -32,9 +32,9 @@ def calc_daily_trend(dates, pageviews, total_pageviews):
# ~Yesterdays pageviews...
y1 = pageviews[-8]
# ~Significance factor based on previous week's pageviews
- weekly_pageviews = sum(pageviews[-8:-2])
+ weekly_pageviews = sum(pageviews[-8:-1])
# Simple baseline trend algorithm
- slope = y2 / (1.0*y1)
+ slope = y2 - y1
trend = slope * (1.0 + log(1.0 +int(weekly_pageviews)))
error = 1.0/sqrt(int(total_pageviews))
return trend, error
@@ -68,7 +68,7 @@ WHERE new_pages.id=new_featured_pages.page_id;"
time mysql -u root trendingtopics_production -e "CALL dropindex('new_pages', 'pages_feature_trend_index');"
time mysql -u root trendingtopics_production -e "create index pages_feature_trend_index on new_pages (featured, monthly_trend);"
time mysql -u root trendingtopics_production -e "CALL dropindex('new_pages', 'pages_feature_id_index');"
-time mysql -u root trendingtopics_production -e "create index pages_feature_id_index on new_pages (featured, id);"
+#time mysql -u root trendingtopics_production -e "create index pages_feature_id_index on new_pages (featured, id);"
echo archiving the data to S3
# back up the trendsdb data, this copy will be pulled by the next daily job
@@ -68,7 +68,7 @@ if [ $HOURLYCOUNT -eq 24 ]; then
s3cmd --force --config=/root/.s3cfg get s3://trendingtopics/wikidump/page_lookup_nonredirects.txt /mnt/page_lookup_nonredirects.txt
# fetch the old page, timelines, & trends tables:
- s3cmd --force --config=/root/.s3cfg get s3://$MYBUCKET/archive/trendsdb.tar.gz /mnt/trendsdb.tar.gz
+ s3cmd --force --config=/root/.s3cfg get s3://$MYBUCKET/archive/$LASTDATE/trendsdb.tar.gz /mnt/trendsdb.tar.gz
# Quick hack to verify size of s3 download
S3_DB_SIZE=`s3cmd ls s3://trendingtopics/archive/trendsdb.tar.gz | tail -1 | awk '{print $3}'`

0 comments on commit 2448230

Please sign in to comment.