# CS608 Project 2: Amazon Fashion Recommendation

## Text Modality - CTR model and analysis

### Setup

In [1]:
!pip install --quiet cornac

[K     |████████████████████████████████| 12.4 MB 69 kB/s 
[?25h  Building wheel for powerlaw (setup.py) ... [?25l[?25hdone


In [3]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [4]:
import sys 
import cornac
import numpy as np
import pandas as pd

from cornac.eval_methods import RatioSplit, BaseMethod
from cornac.models import CTR, CDL, HFT
from cornac.data import TextModality
from cornac.data.text import BaseTokenizer
from collections import defaultdict

%tensorflow_version 1.x
import tensorflow as tf

SEED = 42
VERBOSE = True

TensorFlow 1.x selected.


In [16]:
# Read in reviews and metadata

reviews_train = pd.read_csv('/content/gdrive/MyDrive/MITB/CS608/Project 2/train_review.csv')
reviews_test_seen = pd.read_csv('/content/gdrive/MyDrive/MITB/CS608/Project 2/test_seen_review.csv')
meta = pd.read_csv('/content/gdrive/MyDrive/MITB/CS608/Project 2/amazon_meta_full_43k.csv')

In [17]:
reviews_train.head()

Unnamed: 0,overall,reviewTime,reviewerID,asin,style,reviewText,summary,unixReviewTime
0,5.0,"03 22, 2016",A2MOB79WI93FDN,6040972467,"{'Size:': ' X-Large', 'Color:': ' Pink'}",love it,Five Stars,1458604800
1,3.0,"11 19, 2016",A16O44MQ3ZS7CL,6040972467,"{'Size:': ' 4/6', 'Color:': ' Pink'}",This was too big for me in the bust area (and ...,Eh. Lol. Too big for my little size.,1479513600
2,3.0,"09 17, 2016",A14ZLDIG0XD6AL,6040972467,"{'Size:': ' 12/14', 'Color:': ' White and Blue'}","This dress fit fairly well, except that I'm pr...",Nice dress - fairly deep V,1474070400
3,3.0,"08 21, 2016",A339XOY8DU2UWE,6040972467,"{'Size:': ' 12/14', 'Color:': ' White And Red'}","The fit on this is awesome, and I love the des...","The fit on this is awesome, and I love the design",1471737600
4,5.0,"08 19, 2016",A2BLA3KO8HGANN,6040972467,"{'Size:': ' 16', 'Color:': ' White And Red'}",I am very pleased with this dress! I wasn't su...,Perfect Comfortable Summer Dress!,1471564800


In [18]:
meta.head()

Unnamed: 0.1,Unnamed: 0,asin,imageURL,category,gender,title,description,feature,brand,rank,val_img,img_count,hres_img,description_keywords
0,0,6040974753,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Dress...",female,pistachio womens bandeau tropical floral maxi ...,lightweight dress in multiple gorgeous floral ...,Elasticated ribbed bust design - for maximum c...,Pistachio,"1,253,697inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,1,['https://images-na.ssl-images-amazon.com/imag...,"dress, lightweight, gorgeous, fit, ribbed, flo..."
1,1,6040972467,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Dress...",female,pistachio womens sun flower flowing knee lengt...,"gorgeous lightweight cotton dress in red, pink...","100% Cotton,Women's Floral Pistachio Dress.,Be...",Pistachio,"1,131,061inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,1,['https://images-na.ssl-images-amazon.com/imag...,"cotton, dress, warm, breathable, strap, materi..."
2,2,6040596368,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Coats...",female,ossafashion wedding mink faux fur bridal jacke...,bridal quality artificial fur jacket with coll...,"Condition: New With Tag,Available Sizes: XS-US...",OssaFashion,"469,413inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,5,['https://images-na.ssl-images-amazon.com/imag...,"jacket, fur, collar, bridal, lined, quality, f..."
3,3,9789892837,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Men,Clothing,Shorts,...",male,tommy bahama mens survivalist,tommy bahama men's size chart \ndon't just sur...,"Clothing, Shoes & Jewelry,Men,Clothing,Shorts,...",,"717,299inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,8,['https://images-na.ssl-images-amazon.com/imag...,"waistband, shorts, waist, belt, size, pocket, ..."
4,4,B00008L1ST,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Men,Clothing,Shorts,...",male,dickies mens 13 inch loose fit multipocket wor...,8.5 ounce twill 13 inch work short is the dick...,"100% Cotton,Imported,Hook and Eye closure,Hand...",,"1,087,106inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,12,['https://images-na.ssl-images-amazon.com/imag...,"inch, dickies, fabric, twill, cotton, short, o..."


In [9]:
# Select only user_id, item_id and rating columns from reviews dataset

reviews_only_train = reviews_train[['reviewerID', 'asin', 'overall']]
reviews_only_train.columns = ['user_id', 'item_id', 'rating']

reviews_only_test_seen = reviews_test_seen[['reviewerID', 'asin', 'overall']]
reviews_only_test_seen.columns = ['user_id', 'item_id', 'rating']

reviews_only_train.head()

Unnamed: 0,user_id,item_id,rating
0,A2MOB79WI93FDN,6040972467,5.0
1,A16O44MQ3ZS7CL,6040972467,3.0
2,A14ZLDIG0XD6AL,6040972467,3.0
3,A339XOY8DU2UWE,6040972467,3.0
4,A2BLA3KO8HGANN,6040972467,5.0


In [10]:
# Convert review data to Cornac format 

reviews_dataset_train = list(reviews_only_train.to_records(index=False))
reviews_dataset_test_seen = list(reviews_only_test_seen.to_records(index=False))

In [11]:
# Convert metadata to Cornac format

meta_title = list(meta['title'].astype(str))

meta_description = list(meta['description'].astype(str))
meta_description_kw = list(meta['description_keywords'].astype(str))

meta_ids = list(meta['asin'])

In [12]:
review_text = list(reviews_train['reviewText'].astype(str))
review_ids_user = list(reviews_train['reviewerID'])
review_ids_item = list(reviews_train['asin'])

In [13]:
eval_metrics = [
  cornac.metrics.RMSE(),
  cornac.metrics.NCRR(k=20),
  cornac.metrics.NCRR(k=50),
  cornac.metrics.Recall(k=20),
  cornac.metrics.Recall(k=50),
  cornac.metrics.NDCG(k=20),
  cornac.metrics.NDCG(k=50),
]

### Collaborative Topic Regression (CTR)

#### Baseline CTR model

In [15]:
# Baseline CTR model using item description as corpus

ctr = CTR(k=50, max_iter=20, verbose=VERBOSE, seed=SEED)

train_data = reviews_dataset_train
test_data = reviews_dataset_test_seen
docs = meta_description
item_ids = meta_ids

item_text_modality = TextModality(corpus=docs, ids=item_ids, tokenizer=BaseTokenizer(sep=" ", stop_words="english"), max_vocab=8000, max_doc_freq=0.8,)

eval_method = BaseMethod.from_splits(train_data=train_data, test_data=test_data, exclude_unknowns=True, item_text=item_text_modality, verbose=VERBOSE, seed=SEED)

cornac.Experiment(eval_method=eval_method, models=[ctr], metrics=eval_metrics).run()

rating_threshold = 1.0
exclude_unknowns = True




---
Training data:
Number of users = 505047
Number of items = 42362
Number of ratings = 1053169
Max rating = 5.0
Min rating = 1.0
Global mean = 4.1




---
Test data:
Number of users = 168775
Number of items = 27914
Number of ratings = 191113
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 505047
Total items = 42362

[CTR] Training started!


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Learning completed!

[CTR] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=191113.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=168775.0, style=ProgressStyle(description_w…



TEST:
...
    |   RMSE | NCRR@20 | NCRR@50 | NDCG@20 | NDCG@50 | Recall@20 | Recall@50 | Train (s) |  Test (s)
--- + ------ + ------- + ------- + ------- + ------- + --------- + --------- + --------- + ---------
CTR | 2.5391 |  0.1281 |  0.1296 |  0.1618 |  0.1710 |    0.2698 |    0.3147 | 1626.8714 | 1375.2889



#### Results exploration

In [18]:
vocab = ctr.train_set.item_text.vocab
topic_word_dist = ctr.model.beta.T[:, -ctr.train_set.item_text.max_vocab:] 
top_word_inds = np.argsort(topic_word_dist, axis=1) + 4 

topic_words = {}
topic_df = defaultdict(list)
print("WORD TOPICS:")
for t in range(len(topic_word_dist)):
  top_words = vocab.to_text(top_word_inds[t][-10:][::-1], sep=", ")
  topic_words[t+1] = top_words
  topic_df["Topic"].append(t + 1)
  topic_df["Top words"].append(top_words)
topic_df = pd.DataFrame(topic_df)
topic_df

WORD TOPICS:


Unnamed: 0,Topic,Top words
0,1,"casual, chest, look, offers, great, color, col..."
1,2,"colors, best, tie, styles, season, fabric, siz..."
2,3,"s, fit, style, inch, sleeveless, features, pro..."
3,4,"cm, l, sizes, accessories, tee, inches, measur..."
4,5,"item, sleeve, products, hand, different, long,..."
5,6,"free, vest, , asian, great, feel, measurements..."
6,7,"just, piece, note, women, lining, relaxed, kg,..."
7,8,"cm, dress, cotton, waist, comfortable, quality..."
8,9,"dry, length, jeans, fit, included, vintage, co..."
9,10,"skirt, l, thigh, perfect, performance, materia..."


First user

In [41]:
user = "A2522ZTCKCJZQT"

user_map = dict(ctr.train_set.uid_map.items())
UIDX = user_map[user]

item_id2idx = ctr.train_set.iid_map
item_idx2id = list(ctr.train_set.item_ids)

print(f"USER {UIDX} TOP-3 TOPICS:")
topic_df.loc[np.argsort(ctr.U[UIDX])[-3:][::-1]]

USER 226312 TOP-3 TOPICS:


Unnamed: 0,Topic,Top words
4,5,"item, sleeve, products, hand, different, long,..."
0,1,"casual, chest, look, offers, great, color, col..."
40,41,"hip, tag, large, total, pictures, short, durab..."


In [42]:
TOPK = 10

recommendations, scores = ctr.rank(UIDX)
recs = []
for i in recommendations[:TOPK]:
  recs.append(item_idx2id[i])

meta[meta["asin"].isin(recs)]

Unnamed: 0,asin,imageURL,category,gender,title,description,feature,brand,rank,val_img,img_count,hres_img,description_keywords
11736,B00HWFCL8U,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Activ...",female,champion womens jersey short,champion jersey short teams up with champion j...,"100% Cotton; Oxford Grey: 90% Cotton, 10% Poly...",,"11,153inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,1,['https://images-na.ssl-images-amazon.com/imag...,"jersey, teams, champion, ready, jackets, short..."
26831,B010WJVMWS,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing",female,urban coco womens elastic waist pleated short ...,size chart s: strap:14.5inch--waist:12.6inch-...,"92.9%polyester+7.1%spandex,Pls note that all t...",Urban CoCo,"7,604inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,14,['https://images-na.ssl-images-amazon.com/imag...,"waist, length, chart, skirt, strap, size, 5inc..."
28742,B0146F9OF6,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Tops,...",female,shein womens summer short sleeve loose casual ...,,"Material: 95% Rayon, 5% Polyester,The fabric i...",,"779inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,29,['https://images-na.ssl-images-amazon.com/imag...,
30747,B0166BU7T0,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Short...",female,90 degree by reflex soft and comfy activewear ...,,The Soft and Comfy Activewear Lounge Shorts wi...,,"15,225inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,4,['https://images-na.ssl-images-amazon.com/imag...,
31923,B017JTNSWG,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Swims...",female,zeraca womens strap side bottom halter racerba...,,"Main Fabric: 86%polyester/14%elastane , Lining...",zeraca,"1,880inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,31,['https://images-na.ssl-images-amazon.com/imag...,
35799,B01BMYTAR6,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Short...",female,wax womens juniors midrise denim shorts,the perfect fit mid rise denim shorts for any ...,"High Quality: 75% Cotton, 23% Polyester, 2% Sp...",Wax,"13,879inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,8,['https://images-na.ssl-images-amazon.com/imag...,"denim, shorts, pockets, wardrobe, stretch, zip..."
36127,B01BZF9PEU,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Short...",female,tobeinstyle womens lace shorts,these stretch shorts are perfect when you are ...,"Pull On closure,Styles: Crotchet, Diamond, Sca...",ToBeInStyle,"27,611inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,43,['https://images-na.ssl-images-amazon.com/imag...,"shorts, stretch, wear, comfortable, stylish, l..."
36375,B01C7UVU3Q,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Sweat...",female,biadani women classic open front lightweight s...,lightweight women flyaway cardigan that is ver...,95% RAYON 5% SPANDEX ; 93% RAYON 7% SPANDEX ; ...,BIADANI,"1,659inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,44,['https://images-na.ssl-images-amazon.com/imag...,"lightweight, cardigan, casual, women, flyaway,..."
40946,B01FQS7RSS,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Dress...",female,milumia womens button up split floral print fl...,,"V-Neck, Half Sleeve/ Short Sleeve,Floral Print...",,"1,538inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,37,['https://images-na.ssl-images-amazon.com/imag...,
40951,B01FQT1DJG,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Dresses",female,milumia womens button up split floral print fl...,,"V-Neck, Half Sleeve/ Short Sleeve,Floral Print...",,"2,105inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,42,['https://images-na.ssl-images-amazon.com/imag...,


Second user

In [43]:
user = "A2PACJHFKI74Y3"

user_map = dict(ctr.train_set.uid_map.items())
UIDX = user_map[user]

item_id2idx = ctr.train_set.iid_map
item_idx2id = list(ctr.train_set.item_ids)

print(f"USER {UIDX} TOP-3 TOPICS:")
topic_df.loc[np.argsort(ctr.U[UIDX])[-3:][::-1]]

USER 205868 TOP-3 TOPICS:


Unnamed: 0,Topic,Top words
26,27,"stretch, polyester, xs, use, suitable, designs..."
12,13,"xl, knit, pocket, offer, w, iron, different, k..."
5,6,"free, vest, , asian, great, feel, measurements..."


In [44]:
TOPK = 10

recommendations, scores = ctr.rank(UIDX)
recs = []
for i in recommendations[:TOPK]:
  recs.append(item_idx2id[i])

meta[meta["asin"].isin(recs)]

Unnamed: 0,asin,imageURL,category,gender,title,description,feature,brand,rank,val_img,img_count,hres_img,description_keywords
5207,B008FT5LH0,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing",female,prana womens halle pant,"durable, moisture managing, water resistant, a...","97% Nylon, 3% Spandex,Imported,Stretch Zion fa...",prAna,657inSportsOutdoors(,['https://images-na.ssl-images-amazon.com/imag...,9,['https://images-na.ssl-images-amazon.com/imag...,"halle, prepared, pant, durable, mountain, anyt..."
14379,B00KIZF1VU,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Skirt...",female,mbj womens basic versatile stretchy flared ska...,,"90% POLYESTER 10% SPANDEX,Made in U.S.A.,***BE...",,"1,460inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,49,['https://images-na.ssl-images-amazon.com/imag...,
15112,B00L57P5XO,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Skirt...",female,allegra k womens pleated aline elastic waist b...,,"95% Polyester, 5% Spandex,A Line Suspender Ski...",Allegra K,"95,813inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,6,['https://images-na.ssl-images-amazon.com/imag...,
21658,B00TIC58T4,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Skirts",female,bluetime womens pleated short braces skirt fba,,"Material: Polyester. Stretch and Comfortable,A...",BLUETIME,"16,687inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,9,['https://images-na.ssl-images-amazon.com/imag...,
22990,B00VGHF9KW,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Dresses",female,allegra k womens contrast doll collar short sl...,,"Blue/Gray:95%Poly,5%Spx (Velvet);Other Colors:...",Allegra K,"44,293inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,11,['https://images-na.ssl-images-amazon.com/imag...,
24422,B00XC09O52,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Men,Clothing,Jeans",male,levis mens 541 athletic taper fit jean,perfect fit for athletic builds. relaxed feel ...,"65% Cotton, 33% Polyester, 2% Elastane,Importe...",,"97inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,10,['https://images-na.ssl-images-amazon.com/imag...,"comfort, fit, relaxed, tailored, feel, mobilit..."
26831,B010WJVMWS,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing",female,urban coco womens elastic waist pleated short ...,size chart s: strap:14.5inch--waist:12.6inch-...,"92.9%polyester+7.1%spandex,Pls note that all t...",Urban CoCo,"7,604inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,14,['https://images-na.ssl-images-amazon.com/imag...,"waist, length, chart, skirt, strap, size, 5inc..."
27411,B0123M9EHM,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Leggings",female,carnival womens fulllength printed soft microf...,carnival women's full-length printed soft micr...,"95% Polyester, 5% Spandex,Imported,Hand Wash,S...",Carnival,"19,837inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,12,['https://images-na.ssl-images-amazon.com/imag...,"microfiber, legging, soft, printed, women, len..."
28293,B013NGSC1G,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Leggings",female,dikoaina women girls sexy solid color bandage ...,fashion women's leggings black color. the stri...,Sexy Stylish See-through Spliced milk silk Roc...,Dikoaina,"14,593inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,6,['https://images-na.ssl-images-amazon.com/imag...,"leggings, fashion, women, strips, cloth, black..."
30584,B015ZK64AO,['https://images-na.ssl-images-amazon.com/imag...,"Clothing, Shoes & Jewelry,Women,Clothing,Coats...",female,artfasion womens slim tailoring faux leather p...,"black-98 size : xs ---- bust:84cm/33""---waist:...",Faux leather jacket:100% Polyurethane (shell) ...,Artfasion,"34,272inClothing,ShoesJewelry(",['https://images-na.ssl-images-amazon.com/imag...,7,['https://images-na.ssl-images-amazon.com/imag...,"39cm, waist, 41cm, 38cm, 42cm, 37cm, size, 59c..."


### Collaborative Deep Learning (CDL)

In [12]:
cdl = CDL(k=50, autoencoder_structure=[100], max_iter=10, vocab_size=8000, seed=SEED, verbose=VERBOSE)

train_data = reviews_dataset_train
test_data = reviews_dataset_test_seen
docs = meta_description
item_ids = meta_ids

item_text_modality = TextModality(corpus=docs, ids=item_ids, tokenizer=BaseTokenizer(sep=" ", stop_words="english"), max_vocab=8000, max_doc_freq=0.8)

eval_method = BaseMethod.from_splits(train_data=train_data, test_data=test_data, exclude_unknowns=True, item_text=item_text_modality, verbose=VERBOSE, seed=SEED)

cornac.Experiment(eval_method=eval_method, models=[cdl], metrics=eval_metrics).run()          

rating_threshold = 1.0
exclude_unknowns = True




---
Training data:
Number of users = 505047
Number of items = 42362
Number of ratings = 1053169
Max rating = 5.0
Min rating = 1.0
Global mean = 4.1




---
Test data:
Number of users = 168775
Number of items = 27914
Number of ratings = 191113
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 505047
Total items = 42362

[CDL] Training started!


HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))


Learning completed!

[CDL] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=191113.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=168775.0, style=ProgressStyle(description_w…



TEST:
...
    |   RMSE | NCRR@20 | NCRR@50 | NDCG@20 | NDCG@50 | Recall@20 | Recall@50 | Train (s) |  Test (s)
--- + ------ + ------- + ------- + ------- + ------- + --------- + --------- + --------- + ---------
CDL | 3.1537 |  0.1109 |  0.1121 |  0.1392 |  0.1467 |    0.2288 |    0.2653 | 4756.1856 | 1641.4965



#### Hidden Factors as Topics (HFT)

In [15]:
hft = cornac.models.HFT(k=20, max_iter=20, grad_iter=20, l2_reg=0.001, lambda_text=0.01, vocab_size=8000, seed=SEED, verbose=VERBOSE)

train_data = reviews_dataset_train
test_data = reviews_dataset_test_seen
docs = review_text
item_ids = review_ids_item

item_text_modality = TextModality(corpus=docs, ids=item_ids, tokenizer=BaseTokenizer(sep=" ", stop_words="english"), max_vocab=8000, max_doc_freq=0.8,)

eval_method = BaseMethod.from_splits(train_data=train_data, test_data=test_data, exclude_unknowns=True, item_text=item_text_modality, verbose=VERBOSE, seed=SEED)

cornac.Experiment(eval_method=eval_method, models=[hft], metrics=eval_metrics).run()          

rating_threshold = 1.0
exclude_unknowns = True




---
Training data:
Number of users = 505047
Number of items = 42362
Number of ratings = 1053169
Max rating = 5.0
Min rating = 1.0
Global mean = 4.1




---
Test data:
Number of users = 168775
Number of items = 27914
Number of ratings = 191113
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 505047
Total items = 42362

[HFT] Training started!


HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Learning completed!

[HFT] Evaluation started!


HBox(children=(FloatProgress(value=0.0, description='Rating', max=191113.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Ranking', max=168775.0, style=ProgressStyle(description_w…



TEST:
...
    |   RMSE | NCRR@20 | NCRR@50 | NDCG@20 | NDCG@50 | Recall@20 | Recall@50 |  Train (s) |  Test (s)
--- + ------ + ------- + ------- + ------- + ------- + --------- + --------- + ---------- + ---------
HFT | 0.7122 |  0.0014 |  0.0015 |  0.0022 |  0.0029 |    0.0051 |    0.0084 | 12362.1164 | 1783.2447

