Permalink
Browse files

Initial commit

  • Loading branch information...
0 parents commit 3a5e293e1c35f6286814b582f5c85052fb5d3ca9 Andrew Kane committed Dec 11, 2011
Showing with 251 additions and 0 deletions.
  1. +17 −0 .gitignore
  2. +1 −0 .rspec
  3. +4 −0 Gemfile
  4. +22 −0 LICENSE
  5. +75 −0 README.md
  6. +4 −0 Rakefile
  7. +20 −0 anomaly_detector.gemspec
  8. +57 −0 lib/anomaly_detector.rb
  9. +3 −0 lib/anomaly_detector/version.rb
  10. +40 −0 spec/anomaly_detector_spec.rb
  11. +8 −0 spec/spec_helper.rb
@@ -0,0 +1,17 @@
+*.gem
+*.rbc
+.bundle
+.config
+.yardoc
+Gemfile.lock
+InstalledFiles
+_yardoc
+coverage
+doc/
+lib/bundler/man
+pkg
+rdoc
+spec/reports
+test/tmp
+test/version_tmp
+tmp
1 .rspec
@@ -0,0 +1 @@
+--color
@@ -0,0 +1,4 @@
+source 'https://rubygems.org'
+
+# Specify your gem's dependencies in anomaly_detector.gemspec
+gemspec
22 LICENSE
@@ -0,0 +1,22 @@
+Copyright (c) 2011 Andrew Kane
+
+MIT License
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,75 @@
+
+# AnomalyDetector
+
+Anomaly detection using a normal distribution.
+
+## Installation
+
+Add this line to your application's Gemfile:
+
+```ruby
+gem "anomaly_detector"
+```
+
+And then execute:
+
+```sh
+bundle install
+```
+
+## How to Use
+
+Train the detector with **only non-anomalies**. Each row is a sample.
+
+```ruby
+train_data = [
+ [0.1, 100, 1.4],
+ [0.2, 101, 2.1],
+ [0.5, 102, 1.6]
+]
+ad = AnomalyDetector.new(train_data)
+```
+
+That's it! Let's test for anomalies.
+
+```ruby
+test_sample = [1.0, 100, 1.4]
+ad.probability(test_sample)
+# => 0.0007328491480297603
+```
+
+**Super-important:** You must select a threshold for anomalies (which we denote with ε - "epsilon")
+
+Probabilities less than ε are considered anomalies. If ε is higher, more things are considered anomalies.
+
+``` ruby
+ad.anomaly?(test_sample, 1e-10)
+# => false
+ad.anomaly?(test_sample, 0.5)
+# => true
+```
+
+Here's sample to code to help you find the best ε for your application.
+
+```ruby
+# TODO
+```
+
+You can easily persist the detector in a file or database.
+
+```ruby
+# TODO Finish example
+Marshal.dump(ad)
+```
+
+## Contributing
+
+1. Fork it
+2. Create your feature branch (`git checkout -b my-new-feature`)
+3. Commit your changes (`git commit -am 'Added some feature'`)
+4. Push to the branch (`git push origin my-new-feature`)
+5. Create new Pull Request
+
+## Thanks
+
+A special thanks to [Andrew Ng](http://www.ml-class.org).
@@ -0,0 +1,4 @@
+#!/usr/bin/env rake
+require "bundler/gem_tasks"
+require "rspec/core/rake_task"
+RSpec::Core::RakeTask.new("spec")
@@ -0,0 +1,20 @@
+# -*- encoding: utf-8 -*-
+require File.expand_path('../lib/anomaly_detector/version', __FILE__)
+
+Gem::Specification.new do |gem|
+ gem.authors = ["Andrew Kane"]
+ gem.email = ["andrew@getformidable.com"]
+ gem.description = %q{Anomaly detection using a normal distribution.}
+ gem.summary = %q{Anomaly detection using a normal distribution.}
+ gem.homepage = "https://github.com/ankane/anomaly_detector"
+
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
+ gem.files = `git ls-files`.split("\n")
+ gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
+ gem.name = "anomaly_detector"
+ gem.require_paths = ["lib"]
+ gem.version = AnomalyDetector::VERSION
+
+ gem.add_development_dependency "rake"
+ gem.add_development_dependency "rspec", ">= 2.0.0"
+end
@@ -0,0 +1,57 @@
+require "anomaly_detector/version"
+
+begin
+ require "narray"
+ require "nmatrix"
+rescue LoadError
+ require "matrix"
+end
+
+class AnomalyDetector
+
+ def initialize(data)
+ # Use NMatrix if possible
+ if defined?(NMatrix) and (!defined?(Matrix) or !data.is_a?(Matrix))
+ d = data.is_a?(NMatrix) ? data : NMatrix.to_na(data)
+
+ # Convert these to an array for Marshal.dump
+ @mean = d.mean(1).to_a
+ @std = d.stddev(1).to_a
+ else
+ d = data.is_a?(Matrix) ? data : Matrix.rows(data)
+ cols = d.column_size.times.map{|i| d.column(i)}
+ @mean = cols.map{|c| mean(c)}
+ @std = cols.each_with_index.map{|c,i| std(c, @mean[i])}
+ end
+
+ raise "Standard deviation cannot be zero" if @std.find_index{|i| i == 0 or i.nan?}
+ end
+
+ def probability(x)
+ raise ArgumentError, "x must have #{@mean.size} elements" if x.size != @mean.size
+ x.each_with_index.map{|a,i| normal_pdf(a, @mean[i], @std[i]) }.reduce(1, :*)
+ end
+
+ def anomaly?(x, epsilon)
+ probability(x) < epsilon
+ end
+
+ protected
+
+ SQRT2PI = Math.sqrt(2*Math::PI)
+
+ def normal_pdf(x, mean = 0, std = 1)
+ 1/(SQRT2PI*std)*Math.exp(-((x - mean)**2/(2.0*(std**2))))
+ end
+
+ # Not used for NArray
+
+ def mean(x)
+ x.inject(0.0){|a, i| a + i}/x.size
+ end
+
+ def std(x, mean)
+ Math.sqrt(x.inject(0.0){|a, i| a + (i - mean) ** 2}/(x.size - 1))
+ end
+
+end
@@ -0,0 +1,3 @@
+class AnomalyDetector
+ VERSION = "0.0.1"
+end
@@ -0,0 +1,40 @@
+require "spec_helper"
+
+describe AnomalyDetector do
+ let(:data) { [[-1,-2],[0,0],[1,2]] }
+ let(:ad) { AnomalyDetector.new(data) }
+
+ # mean = [0, 0], std = [1, 2]
+ it "computes the right probability" do
+ ad.probability([0,0]).should == 0.079577471545947667
+ end
+
+ it "marshalizes" do
+ expect{ Marshal.dump(ad) }.to_not raise_error
+ end
+
+ context "when standard deviation is 0" do
+ let(:data) { [[1],[1]] }
+
+ it "raises error" do
+ expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
+ end
+ end
+
+ context "when one training example" do
+ let(:data) { [[1]] }
+
+ it "raises error" do
+ expect{ ad }.to raise_error RuntimeError, "Standard deviation cannot be zero"
+ end
+ end
+
+ context "when data is a matrix" do
+ let(:data) { [[-1,-2],[0,0],[1,2]] }
+ let(:sample) { [rand, rand] }
+
+ it "returns the same probability as an NMatrix" do
+ ad.probability(sample).should == AnomalyDetector.new(Matrix.rows(data)).probability(sample)
+ end
+ end
+end
@@ -0,0 +1,8 @@
+require "rubygems"
+require "bundler/setup"
+
+require "anomaly_detector"
+require "matrix"
+
+RSpec.configure do |config|
+end

0 comments on commit 3a5e293

Please sign in to comment.