# atduskgreg/Processing-BayesianExamples

### Subversion checkout URL

You can clone with
or
.

initial commit

commit 8ae533416dfe800322cbb2a83d4b15f420c1db90 0 parents
Greg Borenstein authored
 @@ -0,0 +1,21 @@ +## Examples of Applying Bayes Rule in Processing + +This repository collects a series of Processing sketches demonstrating the use Bayes Rule. It was created as part of [Makematics at ITP in Fall 2012](http://makematics.com/syllabus/2012-fall). + +What follows is a description of the sketches contained herein: + +### A Basic Demonstration of Bayes Rule + +Expressed in the classic form of an imperfect test for a rare disease. Calculates all four probabilities possible with the two different test results. + +### Bayesian Localization + +A demonstration of determining the position of a "robot" in 1 dimension given a set of uncertain sensor readings. + + + +This was adapted from Unit 1 of Sebastian Thrun's [Udacity Self-Driving Car class](http://www.udacity.com/overview/Course/cs373/CourseRev/apr2012) + +### Bayesian Spam Filtering + +An implementation of Paul Graham's classic technique from [A Plan for Spam](http://www.paulgraham.com/spam.html). Adapted from [this java implementation by Dan Shiffman](http://www.shiffman.net/teaching/a2z/bayesian/).
22 basic_bayes_rule/basic_bayes_rule.pde
 @@ -0,0 +1,22 @@ +float frequencyOfDisease = 0.01; + +float accuracyOfTest = 0.95; + +float totalPositiveProbability = (accuracyOfTest*frequencyOfDisease) + (1-accuracyOfTest)*(1-frequencyOfDisease); + +float probabilityOfDisesaeGivenPositiveTest = (accuracyOfTest*frequencyOfDisease)/totalPositiveProbability; +float probabilityOfHealthGivenPositiveTest = ((1-accuracyOfTest)*(1-frequencyOfDisease))/totalPositiveProbability; + +float totalNegativeProbability = (accuracyOfTest * (1- frequencyOfDisease)) + (1-accuracyOfTest)*(frequencyOfDisease); + + +float probabilityOfDiseaseGivenNegativeTest = ((1-accuracyOfTest)*frequencyOfDisease)/totalNegativeProbability; +float probabilityOfHealthGivenNegativeTest = (accuracyOfTest*(1-frequencyOfDisease))/totalNegativeProbability; + +println("probabilityOfDisesaeGivenPositiveTest: " + probabilityOfDisesaeGivenPositiveTest); +println("probabilityOfHealthGivenPositiveTest: " + probabilityOfHealthGivenPositiveTest); +println(probabilityOfHealthGivenPositiveTest + probabilityOfDisesaeGivenPositiveTest); + +println("probabilityOfDiseaseGivenNegativeTest: " + probabilityOfDiseaseGivenNegativeTest); +println("probabilityOfHealthGivenNegativeTest: " + probabilityOfHealthGivenNegativeTest); +println(probabilityOfDiseaseGivenNegativeTest + probabilityOfHealthGivenNegativeTest);
176 bayesian_localization/bayesian_localization.pde
 @@ -0,0 +1,176 @@ +boolean[] cells; +float[] probabilities; + +int numCells = 10; +int currentPosition; +int doorWidth = 20; +int doorHeight = 50; +// inaccurate senor probabilites +float hitError = 0.6; +float missError = 0.2; + +boolean showPosition = false; + +void setup() { + size(400, 400); + cells = new boolean[numCells]; + probabilities = new float[numCells]; + for (int i = 0; i < cells.length; i++) { + cells[i] = (random(1) > 0.7); + probabilities[i] = 1.0/cells.length; + } + + currentPosition = (int)random(numCells); +} + +void draw() { + background(255); + drawSensorReading(); + + if(showPosition){ + fill(0); + text("True position: " + currentPosition, 50, 200); + } + + translate(10, 10); + drawCells(); + + translate(0, 150); + drawProbabilities(); +} +void sense(boolean sensorReading){ + float[] newProbs = new float[probabilities.length]; + + for(int i = 0; i < cells.length; i++) { + if(cells[i] == sensorReading){ + newProbs[i] = probabilities[i] * hitError; + } else { + newProbs[i] = probabilities[i] * missError; + } + } + + probabilities = newProbs; + + // now we have to normalize, i.e. + // make sure all the probabilities add up to 1 + float sum = 0; + for(float i : probabilities){ + sum += i; + } + + for(int i = 0; i < probabilities.length; i++){ + probabilities[i] /= sum; + } +} + +void move(int move){ + boolean[] newCells = new boolean[cells.length]; + + for(int i = 0; i < cells.length; i++){ + int j = (i+(cells.length + move)) % cells.length; + + newCells[i] = cells[j]; + } + + cells = newCells; +} + + + +void drawProbabilities(){ + pushMatrix(); + pushStyle(); + stroke(0); + noFill(); + float scale = 0.8; + scale(scale); + + beginShape(); + for(int i = 0; i < cells.length; i++) { + float doorX = (doorWidth+5) * i * 1/scale; + vertex(doorX+doorWidth/2, probabilities[i]*-100); + pushStyle(); + fill(125); + text(nf(probabilities[i],1,2), doorX, 15); + popStyle(); + } + endShape(); + popStyle(); + popMatrix(); +} + +void drawSensorReading() { + pushMatrix(); + pushStyle(); + translate(width-100, 10); + + fill(0); + text("SENSOR", 0, 10); + noFill(); + stroke(0); + rect(0, 20, 90, 100); + + if (takeSensorReading()) { + pushMatrix(); + translate(30, 30); + drawDoor(); + popMatrix(); + } + + popStyle(); + popMatrix(); +} + +boolean takeSensorReading() { + // perfect measurement + return cells[currentPosition]; +} + +void drawCells() { + + pushStyle(); + + pushMatrix(); + for (int i = 0; i < cells.length; i++) { + int doorX = (doorWidth+5) * i; + + if (cells[i]) { + pushMatrix(); + translate(doorX, 0); + drawDoor(); + popMatrix(); + } + fill(0); + text(i, doorX-5+doorWidth/2, + doorHeight+15); + } + popMatrix(); + popStyle(); +} + +void drawDoor() { + pushMatrix(); + pushStyle(); + fill(110, 50, 50); + stroke(0); + rect(0, 0, doorWidth, doorHeight); + popStyle(); + popMatrix(); +} + +void keyPressed(){ + if(key == ' '){ + sense(takeSensorReading()); + } + + if(key == '='){ + move(1); + } + if(key == '-'){ + move(-1); + } + + if(key == 'r'){ + showPosition = !showPosition; + } + saveFrame("bayesian-localization-####.png"); +}
212 bayesian_spam_filtering/SpamFilter.pde
76 bayesian_spam_filtering/Word.pde
31 bayesian_spam_filtering/bayesian_spam_filtering.pde
 @@ -0,0 +1,31 @@ + +void setup() { + // Create a new SpamFilter Object + SpamFilter filter = new SpamFilter(); + + // Train spam with a file of spam e-mails + filter.trainSpam("spam.txt"); + // Train spam with a file of regular e-mails + filter.trainGood("good.txt"); + // We are finished adding words so finalize the results + filter.finalizeTraining(); + + + for (int i = 1; i < 4; i++) { + // Read in a text file + String stuff = join(loadStrings("messages/mail" + i + ".txt"), '\n'); + + // Ask the filter to analyze it + boolean spam = filter.analyze(stuff); + + // Print results + println(i); + if (spam) println("I do believe this message is spam!"); + else println("I do believe this is a genuine message!"); + } + noLoop(); +} + +void draw() { +} +
28,002 bayesian_spam_filtering/data/good.txt
28,002 additions, 0 deletions not shown
99 bayesian_spam_filtering/data/messages/mail1.txt
66 bayesian_spam_filtering/data/messages/mail2.txt
 @@ -0,0 +1,66 @@ + +Delivered-To: daniel.shiffman@gmail.com +Received: by 10.82.105.10 with SMTP id d10cs851587buc; + Thu, 18 Jan 2007 17:51:15 -0800 (PST) +Received: by 10.65.219.6 with SMTP id w6mr2040312qbq.1169171474751; + Thu, 18 Jan 2007 17:51:14 -0800 (PST) +Return-Path: <1-208237-shiffman.net?daniel@mx27.sitecartooncodes.com> +Received: from looneymail-mx3.dreamhost.com (sd-green-bigip-119.dreamhost.com [208.97.132.119]) + by mx.google.com with ESMTP id f13si1797058qba.2007.01.18.17.51.13; + Thu, 18 Jan 2007 17:51:14 -0800 (PST) +Received-SPF: neutral (google.com: 208.97.132.119 is neither permitted nor denied by best guess record for domain of 1-208237-shiffman.net?daniel@mx27.sitecartooncodes.com) +DomainKey-Status: bad (test mode) +Received: from mx27.sitecartooncodes.com (mx27.sitecartooncodes.com [216.22.37.151]) + by looneymail-mx3.dreamhost.com (Postfix) with SMTP id 332C28966 + for ; Thu, 18 Jan 2007 17:51:08 -0800 (PST) +Comment: DomainKeys? See http://antispam.yahoo.com/domainkeys +DomainKey-Signature: a=rsa-sha1; q=dns; c=simple; + s=s512; d=sitecartooncodes.com; + b=upNDK5xAITQfzQwR9QDHcSPYj65Z1pcbDMKsqKd2QeJ37MUt+4qfn7CyuZyJt0+2KJYPRGQMuzKjaAJGW02X+g==; +Received: from mx27.sitecartooncodes.com [216.22.37.151] by sitecartooncodes.com [216.22.37.151]; + Thu, 18 Jan 2007 19:15:12 EST +MIME-Version: 1.0 +From: Career Center +To: daniel@shiffman.net +Subject: Applicants Needed +Date: Thu, 18 Jan 2007 19:15:12 EST +Message-ID: <1-208237-3eBI4TXnSm4UU1BIYITs@mx27.sitecartooncodes.com> +X-Mailer: 3.2.12-35 [Dec 13 2006, 12:56:48] +Content-Type: text/plain; charset="ISO-8859-1" +Content-Transfer-Encoding: 7bit + +Re: Applicants Needed ! + +http://sitecartooncodes.com/ardyh?e=5qrK3bGNgS3mmCrK9Kbw&m=208237&l=0 + + +Card Processors Needed ASAP, Work This Week! + +http://sitecartooncodes.com/ardyh?e=2x51vAJRUevGGj5181A.&m=208237&l=0 + + + + + + + + + + + + + +To Leave BCU: + +http://sitecartooncodes.com/imys?m=208237&l=1 + + +or write to: BCU/16192/Coastal/Highway/Lewes/DE/19958 + +If you no longer want these: +http://sitecartooncodes.com/unsub.php?e=daniel@shiffman.net&m=208237 +270_Weeks_Ave._Manorville_NY_11949 + + +<1;0d4ebck9Nzb33Q4eUecr;208237> +
80 bayesian_spam_filtering/data/messages/mail3.txt
 @@ -0,0 +1,80 @@ + +Delivered-To: daniel.shiffman@gmail.com +Received: by 10.82.108.20 with SMTP id g20cs121270buc; + Wed, 7 Feb 2007 19:51:57 -0800 (PST) +Received: by 10.114.152.17 with SMTP id z17mr3040680wad.1170906716548; + Wed, 07 Feb 2007 19:51:56 -0800 (PST) +Return-Path: <1-2066289-shiffman.net?daniel@mls5.bigpixel-central.com> +Received: from looneymail-mx3.g.dreamhost.com (sd-green-bigip-177.dreamhost.com [208.97.132.177]) + by mx.google.com with ESMTP id 7si3370470wrh.2007.02.07.19.51.56; + Wed, 07 Feb 2007 19:51:56 -0800 (PST) +Received-SPF: neutral (google.com: 208.97.132.177 is neither permitted nor denied by best guess record for domain of 1-2066289-shiffman.net?daniel@mls5.bigpixel-central.com) +Received: from mls5.bigpixel-central.com (mls5.bigpixel-central.com [69.6.2.20]) + by looneymail-mx3.g.dreamhost.com (Postfix) with SMTP id 8DC8C85CD + for ; Wed, 7 Feb 2007 19:51:55 -0800 (PST) +Received: from mls5.bigpixel-central.com [69.6.2.20] by bigpixel-central.com [69.6.2.20]; + Wed, 7 Feb 2007 23:36:07 EST +MIME-Version: 1.0 +From: Local Satellite Source +To: daniel@shiffman.net +Subject: Your cable company doesn't want you to see this... +Date: Wed, 7 Feb 2007 23:36:07 EST +Message-ID: <1-2066289-19L6DFYJhcDHHuL646Fb@mls5.bigpixel-central.com> +X-Mailer: 3.2.12-15 [Dec 22 2006, 15:30:58] +Content-Type: text/html; charset="ISO-8859-1" +Content-Transfer-Encoding: 7bit + +
+ + + +
+
+ +
+ + + + + + + + +
+

*Offer is for new, residential customers only. Participation is subject to credit approval. Social Security Number, major valid credit card or check card and 18 month commitment to America's Top 60, 120 or 180 are required. Service must be activated at the time of installation. Dual tuner receivers may require a constant phone line +connection to eliminate additional charges. Free 3 Months of HBO only with any qualifying, core programming package (AT60/120/180). \$100 or \$200 off requires activation of AT60 or greater and the completion of redemption form provided by DISH Network. Upon acceptance of redemption form, customer will receive a \$10 or \$20 rebate for ten consecutive months +depending on programming selected and activated

+ +

+ +

+ + + +

+ +

+
+

<1;5qrK3bGNgS3mmCrK9Kbw;2066289> + +
9,345 bayesian_spam_filtering/data/spam.txt
9,345 additions, 0 deletions not shown