flucoma · tedmoore · May 31, 2022 · May 31, 2022 · May 31, 2022
diff --git a/doc/KNNClassifier.rst b/doc/KNNClassifier.rst
@@ -1,35 +1,39 @@
-:digest: Classify data with K Nearest Neighbours
+:digest: Classification with K Nearest Neighbours
 :species: data
 :sc-categories: Classification, KNN
-:sc-related: Classes/FluidKNNRegressor, Classes/FluidDataSet, Classes/FluidLabelSet
+:sc-related: Classes/FluidKNNRegressor, Classes/FluidDataSet, Classes/FluidLabelSet, Classes/MLPClassifier
 :see-also: 
-:description: A nearest-neighbour classifier using :fluid-obj:`KDTree` . Each point is assigned the class that is most common among its nearest neighbours. https://scikit-learn.org/stable/modules/neighbors.html#classification
+:description: A nearest-neighbour classifier using a :fluid-obj:`KDTree` .
 
+:discussion:
+
+  In order to make predictions, the KNNClassifier must first be ``fit`` with a :fluid-obj:`DataSet` of data points and a target :fluid-obj:`LabelSet` with a label for each point in the DataSet (by means of a shared identifier).
+
+  To classify a point, ``numNeighbours`` neighbours are determined for the incoming point and whichever class is most common among the neighbours is predicted as the class for the point. If an even number of ``numNeighbours`` is requested and there is a tie, the label with the closer point will be predicted.
 
 :control numNeighbours:
 
-   the number of neighours to consider
+   The number of neighbours to consider
 
 :control weight:
 
-   true / false: whether the neighbours should be weighted by distance
-
+   Whether the neighbours should be weighted by their distance so that closer points have more influence over determining the class. The default is 1 (true).
 
 :message fit:
 
-   :arg dataSet: Source data
+   :arg dataSet: Source :fluid-obj:`DataSet`
 
-   :arg labelSet: Labels for the source data
+   :arg labelSet: A :fluid-obj:`LabelSet` of labels for the source ``dataSet``
 
    :arg action: Run when done
 
-   Fit the model to a source :fluid-obj:`DataSet` and a target :fluid-obj:`LabelSet`. These need to be the same size
+   Fit the model to a source :fluid-obj:`DataSet` and a target :fluid-obj:`LabelSet`. The labels in the :fluid-obj:`LabelSet` correspond to the data points in the :fluid-obj:`DataSet` by means of a shared identifier.
 
 :message predict:
 
-   :arg dataSet: data to predict labels for
+   :arg dataSet: :fluid-obj:`DataSet` of data points to predict labels for
 
-   :arg labelSet: place to write labels
+   :arg labelSet: :fluid-obj:`LabelSet` to write the predicted labels into
 
    :arg action: Run when done
 
@@ -41,4 +45,4 @@
 
    :arg action: Run when done, passes predicted label as argument
 
-   Given a fitted model, predict labels for a data point in a |buffer| and return these to the caller
+   Given a fitted model, predict a label for a data point in ``buffer`` and return to the caller
diff --git a/example-code/sc/KNNClassifier.scd b/example-code/sc/KNNClassifier.scd
@@ -1,133 +1,116 @@
-
+strong::Using MFCC analyses to classify between bells and piano::
 code::
 
-
-// Make:
-// - A KNN Classifier
-// - A DataSet of example points, and a LabelSet of corresponding labels
-// - A DataSet of test data and a LabelSet for predicted labels
-
-(
-~classifier = FluidKNNClassifier(s);
-~source= FluidDataSet(s);
-~labels = FluidLabelSet(s);
-~test = FluidDataSet(s);
-~mapping = FluidLabelSet(s);
-)
-
-//Make some clumped 2D points and place into a DataSet
 (
-~examplepoints = [[0.5,0.5],[-0.5,0.5],[0.5,-0.5],[-0.5,-0.5]];
-~examplelabels = [\red,\orange,\green,\blue];
-d = Dictionary.new;
-d.add(\cols -> 2);
-d.add(\data -> Dictionary.newFrom(~examplepoints.collect{|x, i|[i.asString, x]}.flatten));
-~source.load(d);
-~examplelabels.collect{|x,i| ~labels.addLabel(i, x);};
+// source sounds
+~bells = Buffer.readChannel(s,FluidFilesPath("Tremblay-CF-ChurchBells.wav"),channels:[0]);
+~piano = Buffer.readChannel(s,FluidFilesPath("Tremblay-SA-UprightPianoPedalWide.wav"),channels:[0]);
+
+// split into a trainging set (~80% of the data) and testing set (~20% of the data)
+// for more info about the training-testing split, visit https://learn.flucoma.org/learn/training-testing-split
+~train_ds = FluidDataSet(s);
+~train_ls = FluidLabelSet(s);
+~test_ds = FluidDataSet(s);
+~test_ls = FluidLabelSet(s);
 )
 
-//Make some random, but clustered test points
+// analyse
 (
-~testpoints = (4.collect{
-               64.collect{(1.sum3rand) + [1,-1].choose}.clump(2)
-           }).flatten(1) * 0.5;
-d = Dictionary.with(
-        *[\cols -> 2,\data -> Dictionary.newFrom(
-            ~testpoints.collect{|x, i| [i, x]}.flatten)]);
- ~test.load(d);
-
-)
-
-
-//Fit the classifier to the example DataSet and LabelSet, and then run prediction on the test data into our mapping LabelSet
-(
-~classifier.fit(~source,~labels);
-~classifier.predict(~test, ~mapping, 1);
-)
-
-//Return labels of clustered points - wait for the dump to be done
-(
-~assignments = Array.new(~testpoints.size);
 fork{
-    ~testpoints.do{|x,i|
-        ~mapping.getLabel(i, action:{|l|
-            ~assignments.add(l);
-        });
-        s.sync;
-        if(i==(~testpoints.size - 1)){"Got assignments".postln;}
-    };
-    ~assignments.postln;
+	var mfccbuf = Buffer(s);
+	var flatbuf = Buffer(s);
+	[~bells,~piano].do{
+		arg buf;
+		var label = PathName(buf.path).fileNameWithoutExtension;
+		FluidBufMFCC.processBlocking(s,buf,features:mfccbuf,startCoeff:1);
+		s.sync;
+		mfccbuf.numFrames.do{
+			arg i;
+			var id = "%-%".format(label,i);
+			FluidBufFlatten.processBlocking(s,mfccbuf,i,1,destination:flatbuf);
+
+			// about 80% of the data points will end up in the training data,
+			// about 20% of the data points will end up in the testing data
+			if(0.8.coin){
+				~train_ds.addPoint(id,flatbuf);
+				~train_ls.addLabel(id,label);
+			}{
+				~test_ds.addPoint(id,flatbuf);
+				~test_ls.addLabel(id,label);
+			};
+		};
+	};
+	mfccbuf.free;
+	flatbuf.free;
+	~train_ds.print;
+	~train_ls.print;
+	~test_ds.print;
+	~test_ls.print;
 }
 )
 
-//Visualise: we're hoping to see colours neatly mapped to quandrants...
+// fit the KNNClassifier and make predictions
 (
-c = IdentityDictionary();
-
-c.add(\red->Color.red);
-c.add(\blue->Color.blue);
-c.add(\green->Color.green);
-c.add(\orange-> Color.new255(255, 127, 0));
-
-e = 200 * ((~examplepoints + 1) * 0.5).flatten(1).unlace;
-d = ((~testpoints + 1) * 0.5).flatten(1).unlace;
-// d = [20.collect{1.0.rand}, 20.collect{1.0.rand}];
-w = Window("scatter", Rect(128, 64, 200, 200));
-~colours = [Color.blue,Color.red,Color.green,Color.magenta];
-w.drawFunc = {
-    Pen.use {
-        e[0].size.do{|i|
-            var r = Rect(e[0][i],e[1][i],10,10);
-            Pen.fillColor = c[~examplelabels[i]];
-            Pen.fillOval(r);
-        };
-        d[0].size.do{|i|
-            var x = (d[0][i]*200);
-            var y = (d[1][i]*200);
-            var r = Rect(x,y,5,5);
-            Pen.fillColor = c[~assignments[i].asSymbol].alpha_(0.3);
-            Pen.fillOval(r);
-        }
-    }
-};
-w.refresh;
-w.front;
+~classifier = FluidKNNClassifier(s).fit(~train_ds,~train_ls);
+~predictions_ls = FluidLabelSet(s);
+~classifier.predict(~test_ds,~predictions_ls);
+~test_ls.dump({
+	arg train_ls_dict;
+	~predictions_ls.dump({
+		arg pred_ls_dict;
+		var n_wrong = 0;
+		train_ls_dict["data"].keysValuesDo{
+			arg id, expect;
+			var pred = pred_ls_dict["data"][id][0][12..];
+			expect = expect[0][12..];
+			"id: %\nexpected : %\npredicted: %\n".format(id,expect,pred).postln;
+			if(expect != pred){n_wrong = n_wrong + 1};
+		};
+		"number wrong: %".format(n_wrong).postln;
+	});
+});
 )
 
-// single point prediction on arbitrary value
-~inbuf = Buffer.loadCollection(s,0.5.dup);
-~classifier.predictPoint(~inbuf,{|x|x.postln;});
 ::
-
-subsection::Server Side Queries
-This is the equivalent of predictPoint, but wholly on the server
+strong::Predict a single point in a buffer::
 code::
 
-//Generate a random point and sends a trigger to query, and return the class that point matches
+// analyse a random part of the bells and make a prediction
 (
-{
-	var trig = Impulse.kr(5);
-	var point = WhiteNoise.kr(1.dup);
-    var inputPoint = LocalBuf(2);
-    var outputPoint = LocalBuf(1);
-	Poll.kr(trig, point, [\pointX,\pointY]);
-	point.collect{ |p,i| BufWr.kr([p],inputPoint,i)};
-    ~classifier.kr(trig,inputPoint,outputPoint);
-	Poll.kr(trig,BufRd.kr(1,outputPoint,0,interpolation:0),\cluster);
-}.play;
+~features = Buffer(s);
+~flatbuf = Buffer(s);
+~pred_location = rrand(0,~bells.numFrames-1024);
+FluidBufMFCC.processBlocking(s,~bells,~pred_location,1024,features:~features,startCoeff:1);
+FluidBufFlatten.processBlocking(s,~features,1,1,destination:~flatbuf); // ~features has 3 frames, pull out the middle frame
+~classifier.predictPoint(~flatbuf,{
+	arg pred;
+	"prediction at location: %".format(~pred_location).postln;
+	pred.postln;
+	"".postln;
+});
 )
 
-// to sonify the output, here are random values alternating quadrant.
+::
+strong::Server Side Queries::
+This is the equivalent of predictPoint, but entirely on the server
+code::
+
+// Play the audio and make a prediction on the server
+// the integer reported as the predicted class corresponds to the order (zero-counting) in which the
+// labels were introduced to the training labelset. because we processed the bells first, the 0s here
+// indicate the prediction of the bells, while the 1s indicate piano.
 (
 {
-	var trig = Impulse.kr(MouseX.kr(0,1).exprange(0.5,ControlRate.ir /2).poll(trig:2, label: "Query Frequency"));
-	var step = Stepper.kr(trig,max:3);
-	var point = TRand.kr(-0.1, [0.1, 0.1], trig) + [step.mod(2).linlin(0,1,-0.6,0.6),step.div(2).linlin(0,1,-0.6,0.6)] ;
-    var inputPoint = LocalBuf(2);
-    var outputPoint = LocalBuf(1);
-	point.collect{|p,i| BufWr.kr([p],inputPoint,i)};
-    ~classifier.kr(trig,inputPoint,outputPoint);
-    SinOsc.ar((BufRd.kr(1,outputPoint,0,interpolation:0) + 69).midicps, mul: 0.1);
-}.play
+	var sig = PlayBuf.ar(1,[~bells,~piano],BufRateScale.ir([~bells,~piano]),loop:1);
+	var src = SelectX.ar(ToggleFF.kr(Dust.kr(2)).lag(0.03),sig);
+	var mfccs = FluidMFCC.kr(src,startCoeff:1);
+	var predict_trig = Impulse.kr(10);
+	var mfccbuf = LocalBuf(13);
+	var predbuf = LocalBuf(1);
+	FluidKrToBuf.kr(mfccs,mfccbuf);
+	~classifier.kr(predict_trig,mfccbuf,predbuf);
+	FluidBufToKr.kr(predbuf).poll(label:"prediction");
+	src.dup;
+}.play;
 )
 ::