Permalink
Browse files

got the outlier test working, but it is too sensitive and breaks on a…

… lot of cases
  • Loading branch information...
1 parent baa8b8b commit 0ea44da1a68084d26557b0f9e5d8643080290fb2 @bentut committed Mar 15, 2013
View
@@ -4,6 +4,7 @@ def moving_average_for_sa(start_date_string = self.data.keys.sort[0])
end
def ma_series_data(ma_type_string = "ma", start_date_string = self.data.keys.sort[0], end_date_string = Time.now.to_date.to_s)
+ return {} if start_date_string.nil?
trimmed_data = get_values_after((Date.parse(start_date_string) << 1).to_s, end_date_string)
new_series_data = {}
position = 0
@@ -26,6 +27,7 @@ def ma_series(ma_type_string = "ma", start_date_string = self.data.keys.sort[0],
def window_size
return 12 if self.frequency == "month"
return 4 if self.frequency == "quarter"
+ return 4 if self.frequency == "year"
end
View
@@ -20,5 +20,59 @@ def standard_deviation
return Math.sqrt(self.variance)
end
+ def outlier
+ begin
+ return {} if self.data.count == 0
+ a_residuals = self.average_residuals
+ ma_residuals_sigma = self.standard_deviation_residuals
+ actual_residual = self.residuals
+ outlier_hash = {}
+ a_residuals.each do |date_string, residual|
+ std_dev = ma_residuals_sigma
+ upper_limit = (2.5 * std_dev) + residual
+ lower_limit = residual - (2.5 * std_dev)
+ outlier_hash[date_string] = self.data[date_string] if actual_residual[date_string] > upper_limit or actual_residual[date_string] < lower_limit
+ end
+ return outlier_hash
+ rescue
+ puts "--------error: #{self.name}---------"
+ return {}
+ end
+ end
+
+ def residuals
+ moving_average_data = self.backward_looking_moving_average.data
+ start_date_string = self.data.keys.sort[3] if self.frequency == "quarter" or self.frequency == "year"
+ start_date_string = self.data.keys.sort[11] if self.frequency == "month"
+ end_date_string = self.data.keys.sort[-1]
+ residual_data = {}
+ trimmed_data = self.get_values_after_including(start_date_string, end_date_string)
+ residual_data = trimmed_data.merge(moving_average_data) { |date_string, value, value2| value - value2 }
+ return residual_data
+ end
+
+ def average_residuals
+ residual_data = self.residuals
+ num_array = residual_data.sort.map { |a| a[1]}
+ sum = num_array.inject(0, :+){ | sum, x | sum + x }
+ average_residual_data = {}
+ average_calc = sum / num_array.count
+ keys = self.backward_looking_moving_average.data.keys
+ number = keys.count
+ n = [average_calc] * number
+ final_array = keys.zip(n).flatten.compact
+ average_residual_data = Hash[*final_array]
+ return average_residual_data
+ end
+
+ def standard_deviation_residuals
+ residual_data = self.residuals
+ num_array = residual_data.sort.map { |a| a[1]}
+ sum = num_array.inject(0, :+){ | sum, x | sum + x }
+ average = sum / num_array.count
+ sum_var = num_array.inject(0){ | sum, x | sum + (x - average) ** 2 }
+ var = sum_var / (residual_data.count - 1 )
+ std_deviation = Math.sqrt(var)
+ end
end
View
@@ -219,15 +219,13 @@
"CPICORE@US.A".ts_eval= %Q|"CPICORENS@US.M".ts.aggregate(:year, :average)|
"CPICORE@US.M".ts_eval= %Q|"CPICORE@US.M".tsn.load_from_bls("CUSR0000SA0L1E", "M")|
"CPICORE@US.Q".ts_eval= %Q|"CPICORE@US.M".ts.aggregate(:quarter, :average)|
-"CSCFNS@JP.Q".ts_eval= %Q|Series.load_from_download "CSCFNS@esri.cao.go.jp", { :file_type => "xls", :start_date => "1982-04-01", :end_date=>"2004-01-01", :sheet => "sheet_num:1", :row => "increment:7:1", :col => 2, :frequency => "Q" }|
"CSCFNS@JP.Q".ts_eval= %Q|Series.load_from_download "CSCFNS@esri.cao.go.jp", { :file_type => "xls", :start_date => "2004-04-01", :sheet => "sheet_num:1", :row => "increment:97:3", :col => 2, :frequency => "Q" }|
-"CSCFNS@JP.Q".ts_eval= %Q|"CSCFNS@JP.Q".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_q.xls"|
+"CSCFNS@JP.Q".ts_eval= %Q|Series.load_from_download "CSCFNS@esri.cao.go.jp", { :file_type => "xls", :start_date => "1982-04-01", :end_date=>"2004-01-01", :sheet => "sheet_num:1", :row => "increment:7:1", :col => 2, :frequency => "Q" }|
"CSCF@JP.A".ts_eval= %Q|"CSCFNS@JP.Q".ts.aggregate(:year, :average)|
"CSCF@JP.Q".ts_eval= %Q|Series.load_from_download "CSCF@esri.cao.go.jp", { :file_type => "xls", :start_date => "1982-04-01", :end_date=>"2004-01-01", :sheet => "sheet_num:1", :row => "increment:7:1", :col => 2, :frequency => "Q" }|
"CSCF@JP.Q".ts_eval= %Q|Series.load_from_download "CSCF@esri.cao.go.jp", { :file_type => "xls", :start_date => "2004-04-01", :sheet => "sheet_num:1", :row => "increment:97:3", :col => 2, :frequency => "Q" }|
-"CSCF@JP.Q".ts_eval= %Q|"CSCF@JP.Q".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_q.xls"|
"CSCFNS@JP.M".ts_eval= %Q|Series.load_from_download "CSCFNS@esri.cao.go.jp", { :file_type => "xls", :start_date => "2004-04-01", :sheet => "sheet_num:1", :row => "increment:95:1", :col => 2, :frequency => "M" }|
-"CSCFNS@JP.M".ts_eval= %Q|"CSCFNS@JP.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls"|
+"CSCFNS@JP.M".ts_eval= %Q| "CSCFNS@JP.M".tsn.load_from("/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls" ).trim("2004-03-01","2004-03-01")|
"E&@HAW.A".ts_eval= %Q|"E&@HAW.A".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/esic_CNTY_a.xls"|
"E&@HAW.M".ts_eval= %Q|"E&@HAW.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/esic_CNTY_m.xls"|
"E&@HAW.Q".ts_eval= %Q|"E&@HAW.Q".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/esic_CNTY_q.xls"|
@@ -2512,8 +2510,7 @@
"EMPL@HON.M".ts_eval= %Q|"EMPL@HI.M".ts.share_using("EMPLNS@HON.M".ts.backward_looking_moving_average.trim,"EMPLNS@HI.M".ts.backward_looking_moving_average.trim)|
"EMPL@HON.M".ts_eval= %Q|"EMPL_MC@HI.M".ts.share_using("EMPLNS@HON.M".ts, "EMPLNS@HI.M".ts)|
"EMPL@HON.Q".ts_eval= %Q|"EMPL@HON.M".ts.aggregate(:quarter, :average)|
-"EMPL@JP.M".ts_eval= %Q|Series.load_from_download("LF@stat.go.jp", { :file_type => "xls", :start_row => 10, :start_col => 3, :sheet => "Table 18", :row => "increment:10:1", :col => 7, :frequency => "M" }) * 10|
-"EMPL@JP.M".ts_eval= %Q|"EMPL@JP.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls"|
+"EMPL@JP.M".ts_eval= %Q|Series.load_from_download( "EMPL_HIST@stat.go.jp", { :file_type=>"xls", :start_date=>"1953-01-01", :sheet=>"sheet_num:1", :row=>"increment:11:1", :col=>"5", :frequency=>"M" }) * 10|
"EMPL@JP.Q".ts_eval= %Q|"EMPL@JP.M".ts.aggregate(:quarter, :average)|
"EMPL@JP.A".ts_eval= %Q|"EMPL@JP.Q".ts.aggregate(:year, :average)|
"EMPLNS@KAU.M".ts_eval= %Q|"EMPLNS@KAU.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/bls_histextend_date_format_correct.xls"|
@@ -3985,8 +3982,7 @@
"E_NF&@MAU.M".ts_eval= %Q|"E_NF&@MAU.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/esic_CNTY_m.xls"|
"E_NF&@MAU.Q".ts_eval= %Q|"E_NF&@MAU.Q".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/esic_CNTY_q.xls"|
"E_NF@HI.A".ts_eval= %Q|"E_NFNS@HI.M".ts.aggregate(:year, :average)|
-"E_NF@JP.M".ts_eval= %Q|Series.load_from_download("LF@stat.go.jp", { :file_type => "xls", :start_row => 10, :start_col => 3, :sheet => "Table 18", :row => "increment:10:1", :col => 14, :frequency => "M" }) * 10|
-"E_NF@JP.M".ts_eval= %Q|"E_NF@JP.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls"|
+"E_NF@JP.M".ts_eval= %Q| Series.load_from_download( "ENF_HIST@stat.go.jp", { :file_type=>"xls", :start_date=>"1953-01-01", :sheet=>"sheet_num:1", :row=>"increment:12:1", :col=>"11", :frequency=>"M" }) * 10|
"E_NF@JP.Q".ts_eval= %Q|"E_NF@JP.M".ts.aggregate(:quarter, :average)|
"E_NF@JP.A".ts_eval= %Q|"E_NF@JP.Q".ts.aggregate(:year, :average)|
"E_NF@NBI.A".ts_eval= %Q|"E_NF@HI.A".ts - "E_NF@HON.A".ts|
@@ -5507,8 +5503,7 @@
"LF@HON.M".ts_eval= %Q| "LF_MC@HI.M".ts.share_using("LFNS@HON.M".ts, "LFNS@HI.M".ts)|
"LF@HON.A".ts_eval= %Q|"LFNS@HON.M".ts.aggregate(:year, :average)|
"LF@HON.Q".ts_eval= %Q|"LF@HON.M".ts.aggregate(:quarter, :average)|
-"LF@JP.M".ts_eval= %Q|Series.load_from_download("LF@stat.go.jp", { :file_type => "xls", :start_row => 10, :start_col => 3, :sheet => "Table 18", :row => "increment:10:1", :col => 4, :frequency => "M" }) * 10|
-"LF@JP.M".ts_eval= %Q|"LF@JP.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls"|
+"LF@JP.M".ts_eval= %Q|Series.load_from_download( "LF_HIST@stat.go.jp", { :file_type=>"xls", :start_date=>"1953-01-01", :sheet=>"sheet_num:1", :row=>"increment:11:1", :col=>"5", :frequency=>"M" }) * 10|
"LF@JP.Q".ts_eval= %Q|"LF@JP.M".ts.aggregate(:quarter, :average)|
"LF@JP.A".ts_eval= %Q|"LF@JP.Q".ts.aggregate(:year, :average)|
"LFNS@KAU.M".ts_eval= %Q|"LFNS@KAU.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/bls_histextend_date_format_correct.xls"|
@@ -9236,8 +9231,7 @@
"UR@HON.A".ts_eval= %Q|"URNS@HON.M".ts.aggregate(:year, :average)|
"UR@HON.M".ts_eval= %Q|(("EMPL@HON.M".ts / "LF@HON.M".ts) * -1 + 1)*100|
"UR@HON.Q".ts_eval= %Q|"UR@HON.M".ts.aggregate(:quarter, :average)|
-"UR@JP.M".ts_eval= %Q|Series.load_from_download("LF@stat.go.jp", { :file_type => "xls", :start_row => 10, :start_col => 3, :sheet => "Table 18", :row => "increment:10:1", :col => 31, :frequency => "M" })|
-"UR@JP.M".ts_eval= %Q|"UR@JP.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/jp_upd_m.xls"|
+"UR@JP.M".ts_eval= %Q| Series.load_from_download( "UR_HIST@stat.go.jp", { :file_type=>"xls", :start_date=>"1953-01-01", :sheet=>"sheet_num:1", :row=>"increment:11:1", :col=>"5", :frequency=>"M" })|
"UR@JP.Q".ts_eval= %Q|"UR@JP.M".ts.aggregate(:quarter, :average)|
"UR@JP.A".ts_eval= %Q|"UR@JP.Q".ts.aggregate(:year, :average)|
"URNS@KAU.M".ts_eval= %Q|"URNS@KAU.M".tsn.load_from "/Volumes/UHEROwork/data/rawdata/History/bls_histextend_date_format_correct.xls"|
Oops, something went wrong.

0 comments on commit 0ea44da

Please sign in to comment.