# Movie recommedation system using lightfm library

In [1]:
import numpy as np
from lightfm.datasets import fetch_movielens
from lightfm import LightFM

In [2]:
#fetch data and format
data = fetch_movielens(min_rating = 4.0)

#print training and testing data
print(repr(data['train']))
print(repr(data['test']))

<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 49906 stored elements in COOrdinate format>
<943x1682 sparse matrix of type '<class 'numpy.int32'>'
	with 5469 stored elements in COOrdinate format>


In [3]:
data

{'item_feature_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'item_features': <1682x1682 sparse matrix of type '<class 'numpy.float32'>'
 	with 1682 stored elements in Compressed Sparse Row format>,
 'item_labels': array(['Toy Story (1995)', 'GoldenEye (1995)', 'Four Rooms (1995)', ...,
        'Sliding Doors (1998)', 'You So Crazy (1994)',
        'Scream of Stone (Schrei aus Stein) (1991)'], dtype=object),
 'test': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 5469 stored elements in COOrdinate format>,
 'train': <943x1682 sparse matrix of type '<class 'numpy.int32'>'
 	with 49906 stored elements in COOrdinate format>}

In [4]:
#create model
model = LightFM(loss='warp')
#train model
model.fit(data['train'], epochs = 30, num_threads=2)

<lightfm.lightfm.LightFM at 0x7f050829fb00>

In [5]:
def sample_recommendation(model, data, user_ids):

    #number of users and movies in training data
    n_users, n_items = data['train'].shape

    #generate recommendations for each user input
    for user_id in user_ids: 

        #movies they always like
        known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]

        #movies the model predicts they will like
        scores = model.predict(user_id, np.arange(n_items))
        #rank in order of most liked to least
        top_items = data['item_labels'][np.argsort(-scores)]

        #print results
        print("User %s" % user_id)
        print("         Known positives:")

        for x in known_positives[:3]:
            print("          %s" % x)
        
        print("         Recommend:")

        for x in top_items[:3]:
            print("         %s" % x)

In [6]:
sample_recommendation(model, data, [0, 3, 25, 34, 450])

User 0
         Known positives:
          Toy Story (1995)
          Four Rooms (1995)
          Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
         Recommend:
         Pulp Fiction (1994)
         Fargo (1996)
         Star Wars (1977)
User 3
         Known positives:
          Seven (Se7en) (1995)
          Contact (1997)
          Starship Troopers (1997)
         Recommend:
         Scream (1996)
         Starship Troopers (1997)
         Chasing Amy (1997)
User 25
         Known positives:
          Dead Man Walking (1995)
          Star Wars (1977)
          Fargo (1996)
         Recommend:
         Fargo (1996)
         Contact (1997)
         Trainspotting (1996)
User 34
         Known positives:
          Air Force One (1997)
          Game, The (1997)
          Saint, The (1997)
         Recommend:
         Air Force One (1997)
         Titanic (1997)
         Saint, The (1997)
User 450
         Known positives:
          Contact (1997)
          George of the Jung