Permalink
Browse files

a bunch of new definitions to fill in holes in prognoz and some techn…

…iques to correct pseudohistory and prognoz scaling issue
  • Loading branch information...
1 parent 8545ff3 commit 949c9b32d7284707ea0e80985324b5d07621ef2a @bentut committed Oct 6, 2012
@@ -90,6 +90,9 @@ def source_type
end
end
+ # instead of this "is pseudo_history. May want to do something like this periodically"
+ # might need to define a pseudohistory task somewhere
+ # DataSource.where("eval LIKE '%bls_histextend_date_format_correct.xls%'").each {|ds| ds.mark_as_pseudo_history}
def is_pseudo_history?
pseudo_history_sources = [
"/Volumes/UHEROwork/data/rawdata/History/inc_hist.xls",
@@ -100,19 +103,20 @@ def is_pseudo_history?
pseudo_history_sources.each { |phs| return true if source_eval.index(phs) }
return false
end
-
- def DataPoint.set_pseudo_history
- DataPoint.all.each do |dp|
- begin
- ph = dp.is_pseudo_history?
- dp.update_attributes(:pseudo_history => true) if ph and !dp.pseudo_history
- dp.update_attributes(:pseudo_history => false) if !ph and dp.pseudo_history
- rescue
- puts "error for dp #{dp.id}"
- end
- end
- 0
- end
+
+ #this never finishes running. Doesn't seem to catch all the stuff I want either
+ # def DataPoint.set_pseudo_history
+ # DataPoint.all.each do |dp|
+ # begin
+ # ph = dp.is_pseudo_history?
+ # dp.update_attributes(:pseudo_history => true) if ph and !dp.pseudo_history
+ # dp.update_attributes(:pseudo_history => false) if !ph and dp.pseudo_history
+ # rescue
+ # puts "error for dp #{dp.id}"
+ # end
+ # end
+ # 0
+ # end
def source_type_code
case source_type
@@ -160,7 +160,29 @@ def reload_source
# dp.update_attributes(:history => Time.now) if (dp.history.nil? and dates.index(dp.date_string).nil?)
# end
# end
+
+ # DataSource.where("eval LIKE '%bls_histextend_date_format_correct.xls%'").each {|ds| ds.mark_as_pseudo_history}
+
+ def mark_as_pseudo_history
+ puts "marking ds: #{self.id}"
+ data_points.each {|dp| dp.update_attributes(:pseudo_history => true) }
+ end
+
+ def mark_as_pseudo_history_before(date_string)
+ puts "marking ds: #{self.id}"
+ data_points.where("date_string < '#{date_string}'" ).each {|dp| dp.update_attributes(:pseudo_history => true) }
+ end
+ def unmark_as_pseudo_history
+ puts "unmarking ds: #{self.id}"
+ data_points.each {|dp| dp.update_attributes(:pseudo_history => false) }
+ end
+
+ def unmark_as_pseudo_history_before(date_string)
+ puts "unmarking ds: #{self.id}"
+ data_points.where("date_string < '#{date_string}'" ).each {|dp| dp.update_attributes(:pseudo_history => false) }
+ end
+
def delete_all_other_sources
s = self.series
s.data_sources_by_last_run.each {|ds| ds.delete unless ds.id == self.id}
@@ -352,6 +352,7 @@ def data_from_datapoints
def scaled_data_no_pseudo_history(round_to = 3)
data_hash = {}
self.units ||= 1
+ self.units = 1000 if name[0..2] == "TGB" #hack for the tax scaling. Should not save units
data_points.each do |dp|
data_hash[dp.date_string] = (dp.value / self.units).round(round_to) if dp.current and !dp.pseudo_history
end
@@ -483,6 +484,12 @@ def load_from_fred(code)
new_transformation("loaded series : #{code} from FRED website", series_data)
end
+ def days_in_period
+ series_data = {}
+ data.each {|date_string, val| series_data[date_string] = date_string.to_date.days_in_period(self.frequency) }
+ Series.new_transformation("days in time periods", series_data, Series.frequency_from_code(frequency))
+ end
+
def Series.load_from_fred(code, frequency)
series_data = DataHtmlParser.new.get_fred_series(code)
Series.new_transformation("loaded series : #{code} from FRED website", series_data, Series.frequency_from_code(frequency))
@@ -21,8 +21,7 @@
<a href="/prognoz_data_files">Prognoz Output Files</a> |
<a href="/series">Data Series</a> |
<a href="/data_source_downloads">Downloads</a> |
- <a href="/mapping">Coverage</a> |
- <a href="/investigate">Accuracy</a> |
+ <a href="/investigate_visual">Investigation Dashboard</a> |
<a href="/data_lists">Exports</a> |
Search
@@ -38,7 +38,7 @@
<span class='current-datapoint'>
<% bgcolor = DataSource.find(cdp.data_source_id).color rescue "FFF" %>
<div class='datapoint current-datapoint' style='background-color:#<%= bgcolor %>'>
- <%= "%.3f" % cdp_val %><sup><%= (Time.now.to_date - cdp.created_at.to_date).to_i %></sup>
+ <%= "%.3f" % cdp_val %><sup><%= (Time.now.to_date - cdp.created_at.to_date).to_i %><%="(ph)" if cdp.pseudo_history %></sup>
</div>
<%
#series.data_points.where(:current => false,
@@ -54,7 +54,7 @@
<% dp_val = dp.value / series.units %>
<% bgcolor = DataSource.find(dp.data_source_id).color rescue "FFF" %>
<div class='datapoint' style='background-color:#<%= bgcolor %>'>
- <%= "%.3f" % dp_val %><sup><%= (Time.now.to_date - dp.created_at.to_date).to_i %></sup>
+ <%= "%.3f" % dp_val %><sup><%= (Time.now.to_date - dp.created_at.to_date).to_i %><%="(ph)" if dp.pseudo_history %></sup>
</div>
<% end %>
</span>
@@ -55,6 +55,12 @@ def semi_s
return "#{self.year}-07-01" if [7,8,9,10,11,12].include?(self.mon)
end
+ def days_in_period(frequency)
+ return (self.leap? ? 366 : 365) if frequency == "year"
+ return self.days_in_month + (self >> 1).days_in_month + (self >> 2).days_in_month if frequency == "quarter"
+ return self.days_in_month if frequency == "month"
+ end
+
def days_in_month
Time.days_in_month(self.month, self.year)
end
@@ -170,6 +170,7 @@ def aremos_comparison_display_array
# end
def data_diff(comparison_data, digits_to_round)
+ self.units = 1000 if name[0..2] == "TGB" #hack for the tax scaling. Should not save units
cdp = current_data_points
diff_hash = {}
results = []
@@ -115,9 +115,9 @@ def mc_ma_county_share_for(county_abbrev, series_prefix = self.name.split("@")[0
start_date = "#{series_prefix}NS@#{county_abbrev}.#{f}".ts.first_value_date
end_date = "#{series_prefix}NS@#{county_abbrev}.#{f}".ts.get_last_complete_december_datestring
historical = "#{series_prefix}NS@#{county_abbrev}.#{f}".ts.moving_average_offset_early(start_date,end_date) / "#{series_prefix}NS@HI.#{f}".ts.moving_average_offset_early(start_date,end_date) * self
- historical.print
+ #historical.print
mean_corrected_historical = historical / historical.annual_sum * "#{series_prefix}NS@#{county_abbrev}.#{f}".ts.annual_sum
- mean_corrected_historical.print
+ #mean_corrected_historical.print
current_year = "#{series_prefix}NS@#{county_abbrev}.#{f}".ts.backward_looking_moving_average.get_last_incomplete_year / "#{series_prefix}NS@HI.#{f}".ts.backward_looking_moving_average.get_last_incomplete_year * self
new_transformation("Share of #{name} using ratio of #{series_prefix}NS@#{county_abbrev}.#{f} over #{series_prefix}NS@HI.#{f} using a mean corrected moving average (offset early) and a backward looking moving average for the current year",
mean_corrected_historical.data.series_merge(current_year.data))
View
@@ -131,8 +131,10 @@ task :const_identities => :environment do
t= Time.now
Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/census_imp.xls"
#not sure if these should go in misc or what...
- Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltUnemplStats.xls"
- Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltUnemplStats.xls", "Q"
+ #Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltUnemplStats.xls"
+ #Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltUnemplStats.xls", "Q"
+ Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltURA.xls"
+ Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/AltURQ.xls"
Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/History/prud_upd.xls"
Series.load_all_series_from "/Volumes/UHEROwork/data/rawdata/Manual/hbr_upd_m.csv"
View
@@ -381,9 +381,14 @@ task :tax_identities => :environment do
#9/28/12
#would be nice if this load could be made more efficient. Not sure why the grouped call isn't working well right now...
+
+ ["HI", "HON", "MAU", "KAU", "HAW"].each do |cnty|
+ "TRINNS@#{cnty}.M".ts_eval= %Q|"TRINESNS@#{cnty}.M".ts + "TRINPRNS@#{cnty}.M".ts + "TRINWHNS@#{cnty}.M".ts + "TRINRFNS@#{cnty}.M".ts|
+ "TRCONS@#{cnty}.M".ts_eval= %Q|"TRCOESNS@#{cnty}.M".ts + "TRCOPRNS@#{cnty}.M".ts + "TRCORFNS@#{cnty}.M".ts|
- "TRINNS@HI.M".ts_eval= %Q|"TRINESNS@HI.M".ts + "TRINPRNS@HI.M".ts + "TRINWHNS@HI.M".ts + "TRINRFNS@HI.M".ts|
- "TRCONS@HI.M".ts_eval= %Q|"TRCOESNS@HI.M".ts + "TRCOPRNS@HI.M".ts + "TRCORFNS@HI.M".ts|
+ "TRCONS@#{cnty}.Q".ts_eval= %Q|"TRCONS@#{cnty}.M".ts.aggregate_by(:quarter, :sum)|
+ "TRINNS@#{cnty}.Q".ts_eval= %Q|"TRINNS@#{cnty}.M".ts.aggregate_by(:quarter, :sum)|
+ end
"TR@HI.M".ts_eval=%Q|"TR@HI.M".tsn.load_sa_from("/Volumes/UHEROwork/data/tax/seasadj/sadata.xls", "sadata").trim|
"TR@HI.M".ts_eval=%Q|"TR@HI.M".tsn.load_mean_corrected_sa_from "/Volumes/UHEROwork/data/tax/seasadj/sadata.xls", "sadata"|
"TRFU@HI.M".ts_eval=%Q|"TRFU@HI.M".tsn.load_sa_from("/Volumes/UHEROwork/data/tax/seasadj/sadata.xls", "sadata").trim|
View
@@ -1098,9 +1098,34 @@ task :visitor_identities=>:environment do
end
#from task vlos requires vdayNSs and visNSs and vrlsNSs
- ["CAN", "JP", "USE", "USW", "DM", "IT"].each do |serlist|
+ ["","CAN", "JP", "USE", "USW", "DM", "IT"].each do |serlist|
["HI", "HON", "HAW", "KAU", "MAU", "MAUI", "MOL", "LAN"].each do |cnty|
- "VLOS#{serlist}NS@#{cnty}.M".ts_eval= %Q|"VDAY#{serlist}NS@#{cnty}.M".ts / "VIS#{serlist}NS@#{cnty}.M".ts|
+ "VLOS#{serlist}NS@#{cnty}.M".ts_eval= %Q|"VDAY#{serlist}NS@#{cnty}.M".ts / "VIS#{serlist}NS@#{cnty}.M".ts|
+ "VLOS#{serlist}NS@#{cnty}.Q".ts_eval= %Q|"VDAY#{serlist}NS@#{cnty}.Q".ts / "VIS#{serlist}NS@#{cnty}.Q".ts|
+ end
+ end
+
+ ["HI", "HON", "HAW", "KAU", "MAU"].each do |cnty| #no .MAUI .MOL or .LAN ... missing components
+ ["M", "Q", "A"].each do |f|
+ "VLOS@#{cnty}.#{f}".ts_eval= %Q|"VDAY@#{cnty}.#{f}".ts / "VIS@#{cnty}.#{f}".ts|
+ "VLOSJP@#{cnty}.#{f}".ts_eval= %Q|"VDAYJP@#{cnty}.#{f}".ts / "VISJP@#{cnty}.#{f}".ts|
+ #"VLOSCAN@#{cnty}.#{f}".ts_eval= %Q|"VDAYCAN@#{cnty}.#{f}".ts / "VISCAN@#{cnty}.#{f}".ts| #missing components
+ #"VLOSUSE@#{cnty}.#{f}".ts_eval= %Q|"VDAYUSE@#{cnty}.#{f}".ts / "VISUSE@#{cnty}.#{f}".ts| #missing components
+ #"VLOSUSW@#{cnty}.#{f}".ts_eval= %Q|"VDAYUSW@#{cnty}.#{f}".ts / "VISUSW@#{cnty}.#{f}".ts| #missing components
+ "VLOSDM@#{cnty}.#{f}".ts_eval= %Q|"VDAYDM@#{cnty}.#{f}".ts / "VISDM@#{cnty}.#{f}".ts|
+ "VLOSIT@#{cnty}.#{f}".ts_eval= %Q|"VDAYIT@#{cnty}.#{f}".ts / "VISIT@#{cnty}.#{f}".ts|
+ end
+ end
+
+ ["","CAN", "JP", "USE", "USW", "DM", "IT"].each do |serlist|
+ ["HI", "HON", "HAW", "KAU", "MAU", "MAUI", "MOL", "LAN"].each do |cnty|
+ ["M", "Q"].each do |f|
+ begin
+ "VADC#{serlist}NS@#{cnty}.#{f}".ts_eval= %Q|"VDAY#{serlist}NS@#{cnty}.#{f}".ts / "VDAY#{serlist}NS@#{cnty}.#{f}".ts.days_in_period|
+ rescue
+ puts "ERROR: #{serlist}NS, #{cnty}, #{f}"
+ end
+ end
end
end
@@ -1185,16 +1210,10 @@ task :visitor_identities=>:environment do
"VIS@HAW.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("HAW","VIS").trim|
"VIS@KAU.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("KAU","VIS").trim|
"VIS@MAU.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("MAU","VIS").trim|
-
- ["HI","HON", "HAW", "MAU", "KAU"].each do |county|
- "VIS@#{county}.M".ts_append_eval %Q|"VISJP@#{county}.M".ts + "VISUS@#{county}.M".ts + "VISRES@#{county}.M".ts|
- "VDAY@#{county}.M".ts_append_eval %Q|"VDAYJP@#{county}.M".ts + "VDAYUS@#{county}.M".ts + "VDAYRES@#{county}.M".ts|
- end
-
- ["HON", "HI", "KAU", "MAU", "HAW"].each do |county|
- "VIS@#{county}.A".ts_eval= %Q|"VIS@#{county}.M".ts.aggregate(:year, :sum)|
- end
-
+ "VIS@MAUI.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("MAUI","VIS").trim|
+ "VIS@MOL.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("MOL","VIS").trim|
+ "VIS@LAN.M".ts_eval= %Q|"VISDEMETRA_MC@HI.M".ts.mc_ma_county_share_for("LAN","VIS").trim|
+
"VLOS@HI.M".ts_eval= %Q|"VDAY@HI.M".ts / "VIS@HI.M".ts|
"VSO@HI.M".ts_eval= %Q|"VSO@HI.M".tsn.load_sa_from "/Volumes/UHEROwork/data/tour/seasadj/sadata.xls", "sadata"|
@@ -1214,6 +1233,53 @@ task :visitor_identities=>:environment do
"VEXPPDJP@HI.M".ts_eval= %Q|"VEXPPDJP@HI.M".tsn.load_mean_corrected_sa_from "/Volumes/UHEROwork/data/tour/seasadj/sadata.xls", "sadata"|
"VEXPPDJP@HI.M".ts_eval= %Q|"VEXPPDJP@HI.M".ts.apply_seasonal_adjustment :additive|
+
+ ["HON", "HAW", "KAU", "MAU"].each do |cnty| #MAUI / MOL / LAN?
+ ser = ""
+ "VSO#{ser}@#{cnty}.M".ts_eval= %Q|"VSO#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VSO#{ser}")|
+ "VSODM#{ser}@#{cnty}.M".ts_eval= %Q|"VSODM#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VSODM#{ser}")|
+ "VEXP#{ser}@#{cnty}.M".ts_eval= %Q|"VEXP#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VEXP#{ser}")|
+ "VEXPPD#{ser}@#{cnty}.M".ts_eval= %Q|"VEXPPD#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VEXPPD#{ser}")|
+ "VEXPPT#{ser}@#{cnty}.M".ts_eval= %Q|"VEXPPT#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VEXPPT#{ser}")|
+
+ end
+
+ ["CAN", "JP", "USE", "USW"].each do |ser|
+ ["HON", "HAW", "KAU", "MAU","MAUI","MOL","LAN"].each do |cnty|
+ "VIS#{ser}@#{cnty}.M".ts_eval= %Q|"VIS#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VIS#{ser}")|
+ "VDAY#{ser}@#{cnty}.M".ts_eval= %Q|"VDAY#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VDAY#{ser}")|
+ "VLOS#{ser}@#{cnty}.M".ts_eval= %Q|"VLOS#{ser}@HI.M".ts.mc_ma_county_share_for("#{cnty}","VLOS#{ser}")| #only works for MOL / Maui / LAN
+ "VIS#{ser}@#{cnty}.Q".ts_eval= %Q|"VIS#{ser}@#{cnty}.M".ts.aggregate(:quarter, :sum)|
+ "VIS#{ser}@#{cnty}.A".ts_eval= %Q|"VIS#{ser}@#{cnty}.M".ts.aggregate(:year, :sum)|
+ "VDAY#{ser}@#{cnty}.Q".ts_eval= %Q|"VDAY#{ser}@#{cnty}.M".ts.aggregate(:quarter, :sum)|
+ "VDAY#{ser}@#{cnty}.A".ts_eval= %Q|"VDAY#{ser}@#{cnty}.M".ts.aggregate(:year, :sum)|
+ end
+ end
+
+ ["HI","HON", "HAW", "MAU", "KAU"].each do |county| #think I need to add MAUI, MOL, LAN but need to figure out VIS / VDAYRES
+ "VIS@#{county}.M".ts_append_eval %Q|"VISJP@#{county}.M".ts + "VISUS@#{county}.M".ts + "VISRES@#{county}.M".ts|
+ "VDAY@#{county}.M".ts_append_eval %Q|"VDAYJP@#{county}.M".ts + "VDAYUS@#{county}.M".ts + "VDAYRES@#{county}.M".ts|
+ end
+
+ ["HON", "HI", "KAU", "MAU", "HAW", "MAUI", "MOL", "LAN"].each do |county|
+ "VIS@#{county}.A".ts_eval= %Q|"VIS@#{county}.M".ts.aggregate(:year, :sum)|
+ end
+
+
+ ["","CAN", "JP", "USE", "USW", "DM", "IT"].each do |serlist|
+ ["HI", "HON", "HAW", "KAU", "MAU", "MAUI", "MOL", "LAN"].each do |cnty|
+ ["M", "Q", "A"].each do |f|
+ begin
+ #next unless ["MAUI", "MOL", "LAN"].index(cnty).nil? # think these all eventually need to be in, though
+ "VADC#{serlist}@#{cnty}.#{f}".ts_eval= %Q|"VDAY#{serlist}@#{cnty}.#{f}".ts / "VDAY#{serlist}@#{cnty}.#{f}".ts.days_in_period|
+ rescue
+ puts "ERROR: #{serlist}, #{cnty}, #{f}"
+ end
+ end
+ end
+ end
+
+
#separate section for these...?
["HON", "HI", "KAU", "MAU", "HAW"].each do |cnty|
@@ -1258,6 +1324,19 @@ task :visitor_identities=>:environment do
"RMRV@HI.M".ts_eval= %Q|"RMRV@HI.M".tsn.load_mean_corrected_sa_from "/Volumes/UHEROwork/data/tour/seasadj/sadata.xls", "sadata"|
"RMRV@HI.M".ts_eval= %Q|"RMRV@HI.M".ts.apply_seasonal_adjustment :additive|
+
+ ["HON", "KAU", "MAU", "HAW"].each do |cnty|
+ "OCUP%@#{cnty}.M".ts_eval= %Q|"OCUP%@HI.M".ts.mc_ma_county_share_for("#{cnty}","OCUP%")|
+ "RMRV@#{cnty}.M".ts_eval= %Q|"RMRV@HI.M".ts.mc_ma_county_share_for("#{cnty}","RMRV")|
+ "PRM@#{cnty}.M".ts_eval= %Q|"PRM@HI.M".ts.mc_ma_county_share_for("#{cnty}","PRM")|
+ "OCUP%@#{cnty}.Q".ts_eval= %Q|"OCUP%@#{cnty}.M".ts.aggregate(:quarter, :average)|
+ "RMRV@#{cnty}.Q".ts_eval= %Q|"RMRV@#{cnty}.M".ts.aggregate(:quarter, :average)|
+ "PRM@#{cnty}.Q".ts_eval= %Q|"PRM@#{cnty}.M".ts.aggregate(:quarter, :average)|
+ "OCUP%@#{cnty}.A".ts_eval= %Q|"OCUP%@#{cnty}.M".ts.aggregate(:year, :average)|
+ "RMRV@#{cnty}.A".ts_eval= %Q|"RMRV@#{cnty}.M".ts.aggregate(:year, :average)|
+ "PRM@#{cnty}.A".ts_eval= %Q|"PRM@#{cnty}.M".ts.aggregate(:year, :average)|
+ end
+
"TRMS@HI.A".ts_eval= %Q|Series.load_from_file("/Volumes/UHEROwork/data/rawdata/manual/trms.xls", {:file_type => "xls", :start_date => "1964-01-01", :sheet => "trms", :row => "increment:2:1", :col => 2, :frequency => "A" })|
"TRMS@HON.A".ts_eval= %Q|Series.load_from_file("/Volumes/UHEROwork/data/rawdata/manual/trms.xls", {:file_type => "xls", :start_date => "1964-01-01", :sheet => "trms", :row => "increment:2:1", :col => 3, :frequency => "A" })|
Oops, something went wrong.

0 comments on commit 949c9b3

Please sign in to comment.