In [1]:
import pandas as pd
import numpy as np
import scipy
from lightfm.data import Dataset
from lightfm import LightFM
#this line for fix an interantions between train and test
LightFM._check_test_train_intersections = lambda x, y, z: True
from lightfm.cross_validation import random_train_test_split
from lightfm.evaluation import precision_at_k, recall_at_k, auc_score

  "LightFM was compiled without OpenMP support. "


In [2]:
df=pd.read_csv('data.csv', index_col=[0])

In [3]:
df.head()

Unnamed: 0,Beers,Brewery,abv,Url,Id,Rater,From,Rating,Id_beer
0,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1311021,Derptiluderp,from Indiana,1.23,1
1,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,736885,DCH,from New York,3.99,1
2,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1310518,md3kcn,from Ohio,2.46,1
3,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1301271,RobertB412,from Pennsylvania,3.92,1
4,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1309542,BarfyMan362,from Indiana,4.78,1


LightFM model gives implicit feedback, so I only took raters who voted >=3 and assumed that they like it.

In [4]:
df=df[df['Rating']>=3]

In [5]:
df.head()

Unnamed: 0,Beers,Brewery,abv,Url,Id,Rater,From,Rating,Id_beer
1,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,736885,DCH,from New York,3.99,1
3,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1301271,RobertB412,from Pennsylvania,3.92,1
4,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,1309542,BarfyMan362,from Indiana,4.78,1
7,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,65915,jockstrappy,from Pennsylvania,3.03,1
8,Traditional Lager,Yuengling Brewery,4.5,/beer/profile/182/1351/,188701,philbe311,from Pennsylvania,3.1,1


# Preparing dataset

Before to put data in model, LightFm requires its own dataset. 

In [6]:
#creat data to fit in lightfm model
dataset = Dataset()
dataset.fit((x for x in df.Rater),
            (x for x in df.Beers),
           item_features=(x for x in df.abv),
           user_features=(x for x in df.From))

In [7]:
dataset.mapping()

({'DCH': 0,
  'RobertB412': 1,
  'BarfyMan362': 2,
  'jockstrappy': 3,
  'philbe311': 4,
  'Singlefinpin': 5,
  'CindyFromOmaha': 6,
  'ClinchMtnMan': 7,
  'Skor304': 8,
  'Dingbat88': 9,
  'BobsyerUncle____Bob': 10,
  'Sabtos': 11,
  'jesthu': 12,
  'marcablanc96': 13,
  'Brook82': 14,
  '1009': 15,
  'izraelc': 16,
  'jwp777': 17,
  'realJohnnyHobo': 18,
  'zuker21': 19,
  'MrOH': 20,
  'dingers': 21,
  'Peach63': 22,
  'nicholas2121': 23,
  'shambeano': 24,
  'thesherrybomber': 25,
  'TooManyGlasses': 26,
  'hk117': 27,
  'sjrider': 28,
  'misteil': 29,
  'jjones12': 30,
  'PorterPro125': 31,
  'schoolboy': 32,
  'wmscottsimpsonjr': 33,
  'DypsoBubble': 34,
  'BB1313': 35,
  'Alexc11': 36,
  'Gostlool': 37,
  'crickrun': 38,
  'moodenba': 39,
  'ArchimedesSox': 40,
  'md3kcn': 41,
  'DrBeergood': 42,
  'Mdog': 43,
  'sjdevel': 44,
  'Gaetan0': 45,
  'DaveAhl': 46,
  'Tantonys': 47,
  'PMakowski': 48,
  'SmokySignals': 49,
  'Urtravel': 50,
  'VoxRationis': 51,
  'RonHen': 52,
  'Chr

In [8]:
mapp=dataset.mapping()

In [9]:
beer=list(mapp[3])
raters=list(mapp[1])

In [10]:
(interactions, weight)= dataset.build_interactions([(x[5], x[0]) for x in df.values])

Created item and user feature to put in hybrid model

In [11]:
#make sure to put [] outside of second x to following the lightfm dataset rules
item_features = dataset.build_item_features([(x[0], [x[2]]) for x in df.values])
user_features = dataset.build_user_features([(x[5], [x[6]]) for x in df.values])

In [12]:
dataset.item_features_shape()

(4823, 4972)

In [13]:
#split train set, test set using lightfm library
train, test=random_train_test_split(interactions, test_percentage=.20, random_state=10)

In [14]:
print(repr(train))
print(repr(test))

<8121x4823 sparse matrix of type '<class 'numpy.int32'>'
	with 36284 stored elements in COOrdinate format>
<8121x4823 sparse matrix of type '<class 'numpy.int32'>'
	with 9072 stored elements in COOrdinate format>


# Pure Collaborative Filtering model

In [15]:
pure_model=LightFM(loss='warp')

In [16]:
pure_model.fit(train,epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7fac6262d810>

In [17]:
pure_model.predict(np.int32([1,90, 90]), np.int32([1,9, 67]))

array([ 1.0408082, -0.981616 , -4.34876  ], dtype=float32)

In [18]:
auc_score(pure_model, train).mean()

0.9746668

In [19]:
auc_score(pure_model, test, train_interactions=train).mean()

0.75442946

# Hybrid model

In [20]:
hybrid_model=LightFM(loss='warp')
hybrid_model.fit(train,user_features=user_features, item_features=item_features, epochs=30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7fac6262f150>

In [21]:
auc_score(hybrid_model, train,user_features=user_features,item_features=item_features).mean()

0.85724354

In [22]:
auc_score(hybrid_model, test, train_interactions=train,user_features=user_features, item_features=item_features).mean()

0.7476198

Compared to Hybrid model, Pure Collaborative Filterring model performed better on both train set and test set. Therefore, I decided to use Pure Collaborative Filtering model to predict beer for rater

In [23]:
user=90
items=np.int32([10,23,56,432, 78, 90,56,796,567])

In [24]:
hybrid_model.predict(user,items)

array([-1.7690974 , -2.3363786 ,  0.11954585, -2.2323089 , -1.401464  ,
       -3.9831336 ,  0.11954585, -2.426235  , -0.8164188 ], dtype=float32)

In [25]:
predictions=hybrid_model.predict(user, items)

In [26]:
beer_rec_order=np.argsort(-predictions)
beer_rec_order

array([2, 6, 8, 4, 0, 3, 1, 7, 5])

# Prediction Funtion

In [27]:
def recomment(model, data, users_id):
    n_users, n_items = train.shape
    for user in users_id:
        rater=raters[user]
        df_rater=df[df['Rater']=='{}'.format(rater)].sort_values(by='Rating', ascending=False)
        top3beer=df_rater['Beers'].values[:3]
        print("    Name of rater: {} ".format(rater))
        print('    - Drank beers: ')
        for b in top3beer:
            print(b)
        print('\n')
        print("   - Top 3 recommend beers for {} : ".format(rater))
        score=model.predict(user, np.arange(n_items))
        sortbeer=np.argsort(score)
        rate_beer=list(zip(sortbeer, np.arange(n_items)))
        for rating in rate_beer:
            if rating[0] < 3:
                bia=rating[1]
                print(beer[bia])
            else:
                pass
        print('\n')
        print('\n')        

In [29]:
recomment(pure_model, train, [0,1,2])

    Name of rater: DCH 
    - Drank beers: 
Shipping Out of Boston
Pilsener
Pinstripe Pils


   - Top 3 recommend beers for DCH : 
Carnivora
Shift Happens No. 02 DDH Lager
Where Is Your God Now?!




    Name of rater: RobertB412 
    - Drank beers: 
Dunkel Lager
Traditional Lager
Oktoberfest


   - Top 3 recommend beers for RobertB412 : 
The Premium Malt's
Death By Powerpoint
Where Is Your God Now?!




    Name of rater: BarfyMan362 
    - Drank beers: 
Traditional Lager


   - Top 3 recommend beers for BarfyMan362 : 
Hallertauer Hopfen-Cuvee
Day Wrecker
Where Is Your God Now?! (Teal Version)




