# Group 1 - Data Mining Project: Recommendation Systems
## Evaluating: SVD, BPR, T4Rec



### System level installations

Upgrade our pip environment to ensure we have access to the latest pip packages on pypi.

In [1]:
!python3 -m pip install --upgrade pip

Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m18.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 24.1.2
    Uninstalling pip-24.1.2:
      Successfully uninstalled pip-24.1.2
Successfully installed pip-24.3.1


Install python dependencies

In [2]:
!python3 -m pip install \
datasets \
implicit \
scikit-surprise

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting implicit
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl.metadata (6.1 kB)
Collecting scikit-surprise
  Downloading scikit_surprise-1.1.4.tar.gz (154 kB)
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (48

### Project imports

In [42]:
import gzip
import random
import scipy
import pandas as pd

from collections import defaultdict
from datasets import load_dataset
from implicit import bpr, evaluation
from surprise import SVD, Reader, Dataset
from surprise.model_selection import train_test_split

### Data Loading: Load the Amazon reviews dataset from Huggingface

In [6]:
dataset = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_review_Musical_Instruments", trust_remote_code=True)
print(dataset["full"][0])
dataset_items = load_dataset("McAuley-Lab/Amazon-Reviews-2023", "raw_meta_Musical_Instruments", trust_remote_code=True)
print(dataset_items["full"][0])

#Print our splits
print(dataset.keys())
print(len(dataset["full"]))
print(dataset_items.keys())
print(len(dataset_items["full"]))

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/19.7k [00:00<?, ?B/s]

Amazon-Reviews-2023.py:   0%|          | 0.00/39.6k [00:00<?, ?B/s]

Musical_Instruments.jsonl:   0%|          | 0.00/1.56G [00:00<?, ?B/s]

Generating full split: 0 examples [00:00, ? examples/s]

{'rating': 5.0, 'title': 'Five Stars', 'text': 'Great headphones, comfortable and sound is good. No complaints. I would buy again.', 'images': [], 'asin': 'B003LPTAYI', 'parent_asin': 'B003LPTAYI', 'user_id': 'AGKASBHYZPGTEPO6LWZPVJWB2BVA', 'timestamp': 1452650586000, 'helpful_vote': 0, 'verified_purchase': True}


meta_Musical_Instruments.jsonl:   0%|          | 0.00/632M [00:00<?, ?B/s]

Generating full split: 0 examples [00:00, ? examples/s]

{'main_category': 'Musical Instruments', 'title': 'Pearl Export Lacquer EXL725S/C249 5-Piece New Fusion Drum Set with Hardware, Honey Amber', 'average_rating': 4.2, 'rating_number': 22, 'features': ['Item may ship in more than one box and may arrive separately', '(22x18, 10x7, 12x8, 16x16, 14x5.5)', 'P930 Demonator Pedal', '830 Hardware Pack', 'Matching snare, REMO snare batter side head'], 'description': ["Introducing the best selling drum set of all time... Export Series returns and this time with a lacquer finish. EXL Export Lacquer Series incorporates Pearl's S.S.T. Superior Shell Technology, Opti-Loc tom mounts, all-new 830 Series Hardware with a P-930 Pedal, and a choice of three amazing stocking finishes."], 'price': 'None', 'images': {'hi_res': ['https://m.media-amazon.com/images/I/91RuLqvx9IL._AC_SL1500_.jpg', 'https://m.media-amazon.com/images/I/81q8vubRs-L._AC_SL1500_.jpg', None, 'https://m.media-amazon.com/images/I/81ubSuvhnrL._AC_SL1500_.jpg', 'https://m.media-amazon.com/i

### Load the dataset into a pandas dataframe

In [33]:
# Dataframe fro training
df = pd.DataFrame(dataset['full'][:len(dataset['full']) // 10]).sample(frac=0.5)
print(df.head())

# Smaller Dataframe for evaluation
df_test = pd.DataFrame(dataset['full'][:len(dataset['full']) // 100]).sample(frac=0.5)  # Example test set
print(df_test.head())

        rating                                         title  \
19889      5.0                      Perfect at a value price   
257046     5.0  As good as an intermediate clarinet can get!   
129594     4.0                                     Juts fine   
204411     4.0                Great power controller /w fuze   
194864     5.0                                  Works great.   

                                                     text images        asin  \
19889   For less than $100, I have a new violin! With ...     []  B08D6Z9B9Q   
257046  Wonderfully crafted wooden clarinet with a rea...     []  B009B8URDW   
129594                       Nothing special but it works     []  B075G16SM9   
204411  Great power controller  /w fuze.<br />Lights g...     []  B0029L7KRY   
194864                    Works great!  Nice, clean look.     []  B0014598WQ   

       parent_asin                       user_id      timestamp  helpful_vote  \
19889   B08D6Z4667  AGJUUWMQPYMCMXTY4CGCIAWMLWMA  165

# SVD Based Recommendation

In [19]:
# Initialize the reader object with a rating scale between 1 and 5
reader = Reader(rating_scale=(1, 5))

# Load the dataframe content observing title, text, and rating
surprise_data = Dataset.load_from_df(df[['title', 'text', 'rating']], reader)

In [22]:
# Initialize the Single Value Decomposition model for collaborative filtering
model = SVD()

#Split the data into training and test sets. Only use 25% of the data for speed.
trainset, testset = train_test_split(surprise_data, test_size=.25)

# Fit the model to the training set
model.fit(trainset)

# Assign predictions to the test set of the trained model
predictions = model.test(testset)

In [24]:
# Print the estimated rating for the prediction at index 0
predictions[0].est

4.351441398448133

In [26]:
# Initialize the sse to 0
sse = 0

# Loop through each prediction
for p in predictions:
    # Calculate the difference between the actual rating and the estimate and increase sse
    sse += (p.r_ui - p.est)**2

# Calculate the mean squared error by dividing the sse with the number of predictions
print(sse / len(predictions))

1.0948113469833445


# Bayesian Personalized Ranking

### Preperation

In [27]:
# Assign our variables with empty dictionaries
userIDs, itemIDs, indexToUser, indexToItem, asinToParentAsin = {}, {}, {}, {}, {}
# Assign to an empty set
parentIDs = set()

# Iterate over the dataframe rows
for idx, row in df.iterrows():
    # Assign our variables to associations in our dataset
    user_id, item_id, parent_item_id = row["user_id"], row["asin"], row["parent_asin"]

    # Check if user_id is already in the dictionary, if not then assign it with a unique index
    if user_id not in userIDs:
        userIDs[user_id] = len(userIDs)
        indexToUser[userIDs[user_id]] = user_id

    # Check item_ids inside the dictionary
    if item_id not in itemIDs:
        # Assign unique index to the item_id
        itemIDs[item_id] = len(itemIDs)
        # Map the index to the item
        indexToItem[itemIDs[item_id]] = item_id
        # Associate the item_id to the parent item id
        asinToParentAsin[item_id] = parent_item_id
        # Add the parent to the set of unique parentIDs
        parentIDs.add(parent_item_id)

# Get the lengths to print the totals
nUsers, nItems, nParents = len(userIDs), len(itemIDs), len(parentIDs)
print(f"There are a total of {nUsers} users and {nParents} products with a total of {nItems} items including all variants.")

There are a total of 80296 users and 47747 products with a total of 56756 items including all variants.


In [30]:
# Initialized after extracting the number of users and items
Xui = scipy.sparse.lil_matrix((nUsers, nItems))

# Iterate over each row in the dataframe
for ifx, row in df.iterrows():
    user_id, item_id = row["user_id"], row["asin"]
    #Only storing positive feedback instances
    Xui[userIDs[user_id],itemIDs[item_id]] = 1

# Convert matrix to a compressed sparse row
Xui_csr = scipy.sparse.csr_matrix(Xui)

In [45]:
# Hyperparameter of latent factors
k = 5

# Initialze the BPR model with the hyperparameters
model = bpr.BayesianPersonalizedRanking(factors = k)

# Fit the BPR model to the compressed sparse row matrix
model.fit(Xui_csr)

print(type(Xui_csr))

  0%|          | 0/100 [00:00<?, ?it/s]

<class 'scipy.sparse._csr.csr_matrix'>


### Training

In [None]:
itemFactors = model.item_factors
userFactors = model.user_factors

recommended = model.recommend(0, Xui_csr[0]) # Top 10 Recommendations for the first user
related = model.similar_items(0) # Top 10 Highly similar to the first item (using cosine similarity)

print(recommended)
print(related)

(array([ 4667,  2453,  7102,    96,  7957,  3597,  8535, 36513, 46795,
       18609]), array([0.3095037 , 0.30811095, 0.2780883 , 0.2758715 , 0.27479672,
       0.27175978, 0.27142596, 0.2667919 , 0.263415  , 0.2622479 ],
      dtype=float32))
(array([    0, 28506, 49749, 39221, 32366, 41117, 39191, 11830, 46217,
       17671]), array([0.9999999 , 0.98548394, 0.985164  , 0.9821205 , 0.9820368 ,
       0.97985405, 0.97758555, 0.97524494, 0.9744209 , 0.97325516],
      dtype=float32))


In [None]:
df_items = pd.DataFrame(dataset_items['full'])


In [None]:
df_items.head()

Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Musical Instruments,Pearl Export Lacquer EXL725S/C249 5-Piece New ...,4.2,22,[Item may ship in more than one box and may ar...,[Introducing the best selling drum set of all ...,,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['Best Selling Drum Set of All Time'...,Pearl,"[Musical Instruments, Drums & Percussion, Drum...","{""Item Weight"": ""33 pounds"", ""Product Dimensio...",B01M4HO6RK,,,
1,Musical Instruments,Behringer EUROPOWER EPQ900 Professional 900 Wa...,4.0,13,[2 x 390 Watts into 4 Ohms; 2 x 245 Watts into...,"[BEHRINGER EUROPOWER EPQ900, Professional 900-...",,"{'hi_res': [None, 'https://m.media-amazon.com/...","{'title': [], 'url': [], 'user_id': []}",Behringer,"[Musical Instruments, Live Sound & Stage, Powe...","{""Item Weight"": ""10.8 pounds"", ""Product Dimens...",B00508JFE4,,,
2,Musical Instruments,Washburn Classical Series Acoustic Electric Cu...,3.6,15,[The Washburn is truly a professional instrume...,[C64SCE CLASSICAL GUITAR The cutaway allows ac...,399.0,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Washburn,"[Musical Instruments, Guitars]","{""Item Weight"": ""5.98 pounds"", ""Product Dimens...",B000S5JGMU,,,
3,Musical Instruments,"VocoPro, plug in, Black, 21.00 x 21.00 x 23.00...",3.5,7,"[Includes one microphone and one receiver, Can...",[VocoPro UHF-18 DIAMOND - N Wireless Microphon...,112.0,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",VocoPro,"[Musical Instruments, Live Sound & Stage, PA S...","{""Item Weight"": ""2.29 pounds"", ""Product Dimens...",B00B2HLWZW,,,
4,Musical Instruments,Shure SM7B Vocal Dynamic Microphone for Broadc...,4.9,9512,[ONE MICROPHONE FOR EVERYTHING - Studio Record...,"[The SM7B dynamic microphone has a smooth, fla...",399.0,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['Shure SM7B Mic Demonstration', 'Sh...",Shure,"[Musical Instruments, Microphones & Accessorie...","{""Item Weight"": ""2.7 pounds"", ""Product Dimensi...",B0B89ZSYS7,,,


In [None]:
recommended_items = []
for recommendedId in recommended[0]:
    parent_asin = asinToParentAsin[indexToItem[recommendedId]]
    row = df_items[df_items["parent_asin"] == parent_asin]
    recommended_items.append(row)

x = pd.concat(recommended_items, ignore_index=True)
print(len(x))
x

10


Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Musical Instruments,D'Addario Guitar Strings - Phosphor Bronze Aco...,4.7,60133,"[CORROSION-RESISTANT, PRECISION WOUND – D’Adda...",[D'Addario was the first to use Phosphor Bronz...,10.99,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['D'Addario Phosphor Bronze Acoustic...,D'Addario,"[Musical Instruments, Instrument Accessories, ...","{""Item Weight"": ""1.4 ounces"", ""Product Dimensi...",B0BTC9YJ2W,,,
1,Musical Instruments,Best Choice Products 38in Beginner All Wood Ac...,4.2,28430,[COMPLETE ALL-IN-ONE GUITAR KIT: No need to bu...,[],49.99,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['Is this guitar good for a beginner...,Best Choice Products,"[Musical Instruments, Guitars, Acoustic Guitar...","{""Item Weight"": ""4 pounds"", ""Product Dimension...",B0BTR12G3V,,,
2,Musical Instruments,Pyle Foldable Tripod Microphone Stand - Univer...,4.1,3339,[Product 1: HEIGHT ADJUSTABLE: Pyle heavy-duty...,[Pyle Foldable Tripod Microphone Stand - Unive...,48.07,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['Great Mic Stand!', 'Durable Stand ...",Pyle,"[Musical Instruments, Microphones & Accessorie...","{""Item Weight"": ""4.54 Pounds"", ""Best Sellers R...",B0846J7THY,,,
3,Musical Instruments,Singing Machine SML385BTBK Karaoke System with...,4.2,952,[Top loading CD Player plays music CDs plus CD...,[Host a dance party with the SML385BTBK Singin...,,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Singing Machine,"[Musical Instruments, Electronic Music, DJ & K...","{""Item Weight"": ""5.28 pounds"", ""Product Dimens...",B06XB4WXHB,,,
4,Musical Instruments,Donner DC-2 Guitar Capo for Electric and Acous...,4.7,9813,[🎵【Strong Rust Resistance & High-Strength Meta...,[Introduction:Donner DC-2 Capo is for really f...,,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['Recommend.', 'Why Is This My Go-To...",Donner,"[Musical Instruments, Instrument Accessories, ...","{""Item Weight"": ""0.07 Kilograms"", ""Product Dim...",B09Q39C7W2,,,
5,Musical Instruments,Mybecca 6 Pack Acoustic Panels Studio Foam Egg...,4.3,2232,[Great for spot treating sound on walls in you...,[],13.99,"{'hi_res': [None, None, None], 'large': ['http...",{'title': ['Simple Studio Walls Anywhere! | DI...,Mybecca,"[Musical Instruments, Studio Recording Equipme...","{""Item Weight"": ""16 ounces"", ""Product Dimensio...",B0823216ZR,,,
6,Musical Instruments,Donner 17 Key Kalimba Thumb Piano Solid Mahoga...,4.7,1786,[🎶【Handmade with Mahogany】Donner 17 key Kalimb...,[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['Just fantastic! Really!', 'Showcas...",Donner,"[Musical Instruments, Keyboards & MIDI, Folk &...","{""Item Weight"": ""1.23 pounds"", ""Product Dimens...",B08TBC8F1L,,,
7,Musical Instruments,"Seismic Audio - Pair of Dual 15"" PA DJ SPEAKER...",4.0,97,[500 Watts RMS; 1000 Watts Peak; Impedance: 4 ...,"[Pair of Dual 15"" Audio Speakers Model # - SA-...",428.99,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['The sounds is not good!', 'Powerfu...",Seismic Audio,"[Musical Instruments, Live Sound & Stage, Moni...","{""Item Weight"": ""51 Pounds"", ""Item model numbe...",B094DRK3P8,,,
8,Toys & Games,Elegantoss Portable Bubble Blowing Machine Bub...,3.4,16,[Elegantoss Portable Hubble Bubble Blowing Mac...,[],,"{'hi_res': [None, 'https://m.media-amazon.com/...",{'title': ['Maxx Bubbles - 64oz and 128oz - Ma...,Elegantoss,"[Musical Instruments, Live Sound & Stage, Spec...","{""Product Dimensions"": ""8 x 6 x 5 inches"", ""It...",B09GCRLS3P,,,
9,Amazon Home,Russian Tulip Tips Stainless Steel Icing Pipin...,4.6,268,[],[1],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",XGATML,"[Musical Instruments, Keyboards & MIDI, Electr...","{""Product Dimensions"": ""4.4 x 4.4 x 4.3 inches...",B075WXZTKK,,,


In [None]:
related_items = []
for relatedId in related[0]:
    parent_asin = asinToParentAsin[indexToItem[relatedId]]
    row = df_items[df_items["parent_asin"] == parent_asin]
    related_items.append(row)

x = pd.concat(related_items, ignore_index=True)
print(len(x))
x

10


Unnamed: 0,main_category,title,average_rating,rating_number,features,description,price,images,videos,store,categories,details,parent_asin,bought_together,subtitle,author
0,Musical Instruments,Electronic Drum Set Kids Drum Set Upgraded Ele...,4.4,552,[All-In-One Drum Set: Unleash your child's inn...,[],69.99,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['So cute', 'Product didn’t work - o...",DAUSROOB,"[Musical Instruments, Drums & Percussion, Elec...","{""Item Weight"": ""3 Pounds"", ""Package Dimension...",B0BP73M3G2,,,
1,Tools & Home Improvement,Party Lights Stage lights - Spriak 7color 9-Wa...,3.1,83,[],[],,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['So much fun!', 'Don't let the pric...",Spriak,"[Musical Instruments, Live Sound & Stage, Ligh...","{""Brand"": ""Spriak"", ""Part Number"": ""Happy Zone...",B015E23BHC,,,
2,Musical Instruments,15 Inch Kosher Black Rams Horn Polished Shofar...,4.6,173,[Comes with Peer Hastam® Kosher Authentic Shof...,[Black Rams Horn Shofar 15 inch Polished. Easy...,59.95,"{'hi_res': [None, 'https://m.media-amazon.com/...","{'title': ['Shofar Blowing Guide', 'Kudu Horn ...",Peer Hastam,"[Musical Instruments, Wind & Woodwind Instrume...","{""Item Weight"": ""16 ounces"", ""Package Dimensio...",B01A7HPCJO,,,
3,Musical Instruments,Pyle Foldable Tripod Microphone Stand - Univer...,4.1,3339,[Product 1: HEIGHT ADJUSTABLE: Pyle heavy-duty...,[Pyle Foldable Tripod Microphone Stand - Unive...,48.07,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['Great Mic Stand!', 'Durable Stand ...",Pyle,"[Musical Instruments, Microphones & Accessorie...","{""Item Weight"": ""4.54 Pounds"", ""Best Sellers R...",B0846J7THY,,,
4,Musical Instruments,Pyle Universal Compact Microphone Stand - Mic ...,3.9,341,[BOOM ARM EXTENDABLE: The adjustable telescopi...,"[Introducing, Pyle Universal Compact Microphon...",24.0,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': ['love this stand'], 'url': ['https:...",Pyle,"[Musical Instruments, Microphones & Accessorie...","{""Item Weight"": ""1497 Grams"", ""Product Dimensi...",B002PAY3WY,,,
5,Musical Instruments,Cordoba Digital Clip-on Tuner,4.2,21,"[Clip-on Tuner for Guitars, Ukuleles, and Bass...",[Powerful tuning in a small package. Even in t...,26.39,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Cordoba,"[Musical Instruments, Instrument Accessories, ...","{""Item Weight"": ""3.17 ounces"", ""Package Dimens...",B07GX2MRQK,,,
6,Musical Instruments,Allparts Rickenbacker Volume Knobs,3.2,11,[Genuine AllParts Item],[Volume Knobs for Rickenbacker],19.66,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['DaierTek Guitar Pedal Knobs 6.35MM...,Allparts,"[Musical Instruments, Instrument Accessories, ...","{""Item Weight"": ""0.352 ounces"", ""Product Dimen...",B0015GMU2E,,,
7,Musical Instruments,"Taylor 214ce-L 200 Series Acoustic Guitar, Ros...",5.0,1,"[""Left-handed 6-string Grand Auditorium guitar...","[200 Series, Rosewood, Grand Auditorium, Cutaw...",,{'hi_res': ['https://m.media-amazon.com/images...,"{'title': [], 'url': [], 'user_id': []}",Taylor,"[Musical Instruments, Guitars, Acoustic Guitar...","{""Item Weight"": ""4.4 pounds"", ""Product Dimensi...",B007IVURW0,,,
8,Musical Instruments,"Eastar Flute Clarinet Stand, Portable Tripod D...",4.6,611,[✨Well-made Design — Eastar EST-005 Flute & Cl...,[],24.99,{'hi_res': ['https://m.media-amazon.com/images...,{'title': ['Eastar EST-005 Flute / Clarinet St...,Eastar,"[Musical Instruments, Instrument Accessories, ...","{""Item Weight"": ""13.6 ounces"", ""Package Dimens...",B07M6Q3NZ9,,,
9,Musical Instruments,Compete Audio DC36 replacement foam microphone...,4.4,110,[3 high-quality replacement windscreens for Da...,[3 high-quality foam replacement windscreens f...,9.95,"{'hi_res': [None, 'https://m.media-amazon.com/...",{'title': ['REVIEW Zanyzap 20mm Headset & Lape...,Compete Audio,"[Musical Instruments, Microphones & Accessorie...","{""Item Weight"": ""0.176 ounces"", ""Package Dimen...",B016WTM18G,,,
