# SMR workflows large data

Anton Antonov  
RakuForPrediction at WordPress   
October 2025

----

## Setup

In [24]:
use Data::Reshapers;
use Data::Importers;
use Data::Summarizers;
use DSL::Translators;

use Math::SparseMatrix :ALL;
use Math::SparseMatrix::DOK;
use Math::SparseMatrix::Utilities;

use ML::SparseMatrixRecommender;

In [25]:
#% js
js-d3-list-line-plot(10.rand xx 40, background => 'none', stroke-width => 2)

In [26]:
my $title-color = 'Silver';
my $stroke-color = 'SlateGray';
my $tooltip-color = 'LightBlue';
my $tooltip-background-color = 'none';
my $tick-labels-font-size = 10;
my $tick-labels-color = 'Silver';
my $tick-labels-font-family = 'Helvetica';
my $background = '#1F1F1F';
my $color-scheme = 'schemeTableau10';
my $color-palette = 'Inferno';
my $edge-thickness = 3;
my $vertex-size = 6;
my $mmd-theme = q:to/END/;
%%{
  init: {
    'theme': 'forest',
    'themeVariables': {
      'lineColor': 'Ivory'
    }
  }
}%%
END
my %force = collision => {iterations => 0, radius => 10},link => {distance => 180};
my %force2 = charge => {strength => -30, iterations => 4}, collision => {radius => 50, iterations => 4}, link => {distance => 30};

my %opts = :$background, :$title-color, :$edge-thickness, :$vertex-size;

{background => #1F1F1F, edge-thickness => 3, title-color => Silver, vertex-size => 6}

----

## Ingestion

In [27]:
my $url = 'https://raw.githubusercontent.com/antononcube/MathematicaVsR/refs/heads/master/Data/MathematicaVsR-Data-Mushroom.csv';
my @dsData = data-import($url, headers => 'auto');

@dsData.&dimensions

(8124 24)

In [28]:
deduce-type(@dsData);

Vector(Assoc(Atom((Str)), Atom((Str)), 24), 8124)

---

## SMR

In [29]:
my $smrObj = 
    ML::SparseMatrixRecommender.new
    .create-from-wide-form(@dsData,
        item-column-name => "id",
        tag-types => ["cap-Shape", "cap-Surface", "cap-Color", "bruises?", "odor", "gill-Attachment", "gill-Spacing", "gill-Size", "gill-Color", "edibility"],
        :add-tag-types-to-column-names,
        tag-value-separator => ":")
    .apply-term-weight-functions("IDF", "None", "Cosine")

ML::SparseMatrixRecommender(:matrix-dimensions((8124, 51)), :density(<10/51>), :tag-types(("gill-Size", "gill-Spacing", "cap-Shape", "gill-Attachment", "odor", "bruises?", "cap-Surface", "cap-Color", "edibility", "gill-Color")))

In [32]:
my %prof = "cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4;

my $profVec = $smrObj.to-profile-vector(%prof.Mix).to-adapted

Math::SparseMatrix(:specified-elements(3), :dimensions((51, 1)), :density(<1/17>))

In [34]:
$smrObj
.recommend-by-profile({"cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4}, 12)
.take-value

[1817 => 1 2129 => 1 2179 => 1 2239 => 1 2386 => 1 2444 => 1 2534 => 1 2537 => 1 2540 => 1 2567 => 1 2572 => 1 2595 => 1]

In [35]:
#% html
my @field-names = 'score', 'id', |@dsData.head.keys.grep(* ne 'id').sort;
$smrObj
.recommend-by-profile({"cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4}, 12)
.join-across(@dsData)
.take-value
==> to-html(:@field-names)   

score,id,bruises?,cap-Color,cap-Shape,cap-Surface,edibility,gill-Attachment,gill-Color,gill-Size,gill-Spacing,habitat,odor,population,ring-Number,ring-Type,spore-Print-Color,stalk-Color-Above-Ring,stalk-Color-Below-Ring,stalk-Root,stalk-Shape,stalk-Surface-Above-Ring,stalk-Surface-Below-Ring,veil-Color,veil-Type
1,1817,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,grasses,foul,solitary,one,large,chocolate,brown,buff,bulbous,enlarging,silky,silky,white,partial
1,2129,False,gray,convex,fibrous,poisonous,free,pink,broad,close,grasses,foul,solitary,one,large,chocolate,buff,buff,bulbous,enlarging,silky,silky,white,partial
1,2179,False,gray,convex,fibrous,poisonous,free,pink,broad,close,woods,foul,solitary,one,large,chocolate,brown,pink,bulbous,enlarging,silky,silky,white,partial
1,2239,False,gray,convex,fibrous,poisonous,free,gray,broad,close,paths,foul,solitary,one,large,chocolate,buff,pink,bulbous,enlarging,silky,silky,white,partial
1,2386,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,woods,foul,solitary,one,large,chocolate,brown,brown,bulbous,enlarging,silky,silky,white,partial
1,2444,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,grasses,foul,solitary,one,large,chocolate,buff,buff,bulbous,enlarging,silky,silky,white,partial
1,2534,False,gray,convex,fibrous,poisonous,free,gray,broad,close,paths,foul,solitary,one,large,chocolate,brown,buff,bulbous,enlarging,silky,silky,white,partial
1,2537,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,woods,foul,several,one,large,chocolate,pink,buff,bulbous,enlarging,silky,silky,white,partial
1,2540,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,woods,foul,solitary,one,large,chocolate,buff,brown,bulbous,enlarging,silky,silky,white,partial
1,2567,False,gray,convex,fibrous,poisonous,free,pink,broad,close,woods,foul,solitary,one,large,chocolate,buff,pink,bulbous,enlarging,silky,silky,white,partial


In [37]:
#% html
my @field-names = <score id odor edibility habitat population>;
sink $smrObj
.recommend(<2129 2540>, 10, :!remove-history)
.echo-value
.join-across(@dsData, on => 'id')
.echo-value(as => {&to-pretty-table($_, :@field-names)});

[1817 => 19 2129 => 19 2179 => 19 2386 => 19 2444 => 19 2537 => 19 2540 => 19 2567 => 19 2572 => 19 2698 => 19]
+-----------+------+------+-----------+---------+------------+
|   score   |  id  | odor | edibility | habitat | population |
+-----------+------+------+-----------+---------+------------+
| 19.000000 | 1817 | foul | poisonous | grasses |  solitary  |
| 19.000000 | 2129 | foul | poisonous | grasses |  solitary  |
| 19.000000 | 2179 | foul | poisonous |  woods  |  solitary  |
| 19.000000 | 2386 | foul | poisonous |  woods  |  solitary  |
| 19.000000 | 2444 | foul | poisonous | grasses |  solitary  |
| 19.000000 | 2537 | foul | poisonous |  woods  |  several   |
| 19.000000 | 2540 | foul | poisonous |  woods  |  solitary  |
| 19.000000 | 2567 | foul | poisonous |  woods  |  solitary  |
| 19.000000 | 2572 | foul | poisonous | grasses |  several   |
| 19.000000 | 2698 | foul | poisonous | grasses |  solitary  |
+-----------+------+------+-----------+---------+------------+


---

## Classification

In [43]:
cross-tabulate(@dsData, 'odor', 'edibility')
==> to-pretty-table()

+----------+--------+-----------+
|          | edible | poisonous |
+----------+--------+-----------+
| almond   |  400   |           |
| anise    |  400   |           |
| creosote |        |    192    |
| fishy    |        |    576    |
| foul     |        |    2160   |
| musty    |        |     36    |
| none     |  3408  |    120    |
| pungent  |        |    256    |
| spicy    |        |    576    |
+----------+--------+-----------+