# SMR workflows large data

Anton Antonov  
RakuForPrediction at WordPress   
October 2025

----

## Setup

In [None]:
use Data::Reshapers;
use Data::Importers;
use Data::Summarizers;

use ML::SparseMatrixRecommender;

----

## Ingestion

In [4]:
my $url = 'https://raw.githubusercontent.com/antononcube/MathematicaVsR/refs/heads/master/Data/MathematicaVsR-Data-Mushroom.csv';
my @dsData = data-import($url, headers => 'auto');

@dsData.&dimensions

(8124 24)

In [5]:
deduce-type(@dsData);

Vector(Assoc(Atom((Str)), Atom((Str)), 24), 8124)

---

## SMR

Create a Sparse Matrix Recommender (SMR) object:

In [6]:
my $smrObj = 
    ML::SparseMatrixRecommender.new(:native)
    .create-from-wide-form(@dsData,
        item-column-name => "id",
        tag-types => ["cap-Shape", "cap-Surface", "cap-Color", "bruises?", "odor", "gill-Attachment", "gill-Spacing", "gill-Size", "gill-Color", "edibility"],
        :add-tag-types-to-column-names,
        tag-value-separator => ":")
    .apply-term-weight-functions("IDF", "None", "Cosine")

ML::SparseMatrixRecommender(:matrix-dimensions((8124, 51)), :density(<10/51>), :tag-types(("bruises?", "cap-Shape", "odor", "gill-Size", "gill-Color", "cap-Color", "gill-Spacing", "cap-Surface", "edibility", "gill-Attachment")))

**Remark:** The argument `:native` specifies whether native (C-implemented) sparse matrix algebra structures and operations be used or not.

Recommendation by profile result:

In [7]:
$smrObj
.recommend-by-profile({"cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4}, 12, :!normalize)
.take-value

[3305 => 3.6 3314 => 3.6 3512 => 3.6 3516 => 3.6 3695 => 3.6 3702 => 3.6 3956 => 3.6 3960 => 3.6 3964 => 3.6 4567 => 3.6 4641 => 3.6 4643 => 3.6]

Recommendation by profile with extended result display:

In [8]:
#% html
my @field-names = 'score', 'id', |@dsData.head.keys.grep(* ne 'id').sort;
my @res = |($smrObj
.recommend-by-profile({"cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4}, 12, :!normalize)
.join-across(@dsData)
.take-value)
==> to-html(:@field-names)   

score,id,bruises?,cap-Color,cap-Shape,cap-Surface,edibility,gill-Attachment,gill-Color,gill-Size,gill-Spacing,habitat,odor,population,ring-Number,ring-Type,spore-Print-Color,stalk-Color-Above-Ring,stalk-Color-Below-Ring,stalk-Root,stalk-Shape,stalk-Surface-Above-Ring,stalk-Surface-Below-Ring,veil-Color,veil-Type
3.6,3305,False,gray,convex,fibrous,poisonous,free,gray,broad,close,grasses,foul,several,one,large,chocolate,buff,brown,bulbous,enlarging,silky,silky,white,partial
3.6,3314,False,gray,convex,fibrous,poisonous,free,chocolate,broad,close,woods,foul,solitary,one,large,chocolate,pink,pink,bulbous,enlarging,silky,silky,white,partial
3.6,3512,False,gray,convex,scaly,poisonous,free,chocolate,broad,close,woods,foul,several,one,large,chocolate,buff,pink,bulbous,enlarging,silky,silky,white,partial
3.6,3516,False,gray,convex,smooth,poisonous,free,gray,narrow,close,woods,creosote,several,one,pendant,brown,white,white,bulbous,enlarging,smooth,smooth,white,partial
3.6,3695,False,gray,convex,fibrous,poisonous,free,gray,broad,close,grasses,foul,several,one,large,chocolate,pink,buff,bulbous,enlarging,silky,silky,white,partial
3.6,3702,False,gray,convex,scaly,poisonous,free,pink,broad,close,grasses,foul,solitary,one,large,chocolate,buff,buff,bulbous,enlarging,silky,silky,white,partial
3.6,3956,False,gray,convex,scaly,poisonous,free,gray,broad,close,paths,foul,several,one,large,chocolate,buff,pink,bulbous,enlarging,silky,silky,white,partial
3.6,3960,True,gray,convex,smooth,poisonous,free,white,broad,close,urban,foul,several,one,pendant,chocolate,white,white,bulbous,tapering,fibrous,smooth,white,partial
3.6,3964,False,gray,convex,scaly,poisonous,free,chocolate,broad,close,paths,foul,solitary,one,large,chocolate,buff,buff,bulbous,enlarging,silky,silky,white,partial
3.6,4567,False,gray,convex,scaly,poisonous,free,gray,broad,close,paths,foul,solitary,one,large,chocolate,pink,brown,bulbous,enlarging,silky,silky,white,partial


Recommendation by items (or consumption history):

In [21]:
#% html
my @field-names = <score id odor edibility habitat population>;
sink $smrObj
.recommend(<2129 2540>, 10, :!remove-history)
.echo-value('recommendation result: ')
.join-across(@dsData, on => 'id')       # Enhance the result by an inner join with the original data
.echo-value(as => {&to-pretty-table($_, :@field-names)});       # Show the pipeline value as a "pretty table" over a particular set of columns

recommendation result: [3004 => 19 3162 => 19 3174 => 19 3234 => 19 3236 => 19 3260 => 19 3265 => 19 3607 => 19 3608 => 19 5668 => 19]
+-----------+------+------+-----------+---------+------------+
|   score   |  id  | odor | edibility | habitat | population |
+-----------+------+------+-----------+---------+------------+
| 19.000000 | 3004 | foul | poisonous |  woods  |  solitary  |
| 19.000000 | 3162 | foul | poisonous |  paths  |  several   |
| 19.000000 | 3174 | foul | poisonous |  paths  |  several   |
| 19.000000 | 3234 | foul | poisonous |  paths  |  solitary  |
| 19.000000 | 3236 | foul | poisonous | grasses |  several   |
| 19.000000 | 3260 | foul | poisonous | grasses |  solitary  |
| 19.000000 | 3265 | foul | poisonous |  paths  |  solitary  |
| 19.000000 | 3607 | foul | poisonous | grasses |  several   |
| 19.000000 | 3608 | foul | poisonous |  paths  |  several   |
| 19.000000 | 5668 | foul | poisonous |  paths  |  solitary  |
+-----------+------+------+-----------+-------

---

## Profiling

Here is a simple recommendation by profile profiling loop:

In [17]:
my $n = 100;
my $res;
my $tstart = now;
for ^$n {
    $res = $smrObj
    .recommend-by-profile({"cap-Shape:convex" => 1.2, "cap-Color:gray" => 1, "edibility:poisonous" => 1.4}, 12, :!normalize)
    .take-value
}
my $tend = now;
say "recommendations by profile total time: {$tend - $tstart}, per dot-product: {($tend - $tstart)/$n}";
say "result : ", $res;

recommendations by profile total time: 2.785626026, per dot-product: 0.02785626026
result : [3305 => 3.6 3314 => 3.6 3512 => 3.6 3516 => 3.6 3695 => 3.6 3702 => 3.6 3956 => 3.6 3960 => 3.6 3964 => 3.6 4567 => 3.6 4641 => 3.6 4643 => 3.6]
