In [1]:
from fastai2.tabular.all import *
from fastai2.collab import *

In [3]:
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      usecols=(0,1,2), names=['user','movie','rating'])
movies = pd.read_csv(path/'u.item',  delimiter='|', encoding='latin-1',
                     usecols=(0,1), names=('movie','title'), header=None)

In [4]:
ratings = ratings.merge(movies)

In [5]:
dls = CollabDataLoaders.from_df(ratings, item_name='title', bs=64)

In [6]:
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))

In [7]:
learn.fit_one_cycle(5, 5e-3, wd=0.1)

epoch,train_loss,valid_loss,time
0,0.943764,0.945623,00:08
1,0.850762,0.866062,00:08
2,0.735538,0.828996,00:08
3,0.578858,0.816693,00:08
4,0.469008,0.817968,00:08


### Interpretation

In [10]:
g = ratings.groupby("title")['rating'].count()
top_movies = g.sort_values(ascending=False).index.values[:1000]
top_movies[:10]

array(['Star Wars (1977)', 'Contact (1997)', 'Fargo (1996)',
       'Return of the Jedi (1983)', 'Liar Liar (1997)',
       'English Patient, The (1996)', 'Scream (1996)', 'Toy Story (1995)',
       'Air Force One (1997)', 'Independence Day (ID4) (1996)'],
      dtype=object)

In [11]:
movie_bias = learn.model.bias(top_movies, is_item=True)
movie_bias.shape

torch.Size([1000])

In [16]:
mean_ratings = ratings.groupby("title")['rating'].mean()
movie_ratings = [(b, i, mean_ratings.loc[i]) for i,b in zip(top_movies,movie_bias)]

In [17]:
item0 = lambda o:o[0]
sorted(movie_ratings, key=item0)[:15]

[(tensor(-0.3798),
  'Children of the Corn: The Gathering (1996)',
  1.3157894736842106),
 (tensor(-0.2619), 'Mortal Kombat: Annihilation (1997)', 1.9534883720930232),
 (tensor(-0.2598), 'Bio-Dome (1996)', 1.903225806451613),
 (tensor(-0.2545), "McHale's Navy (1997)", 2.1884057971014492),
 (tensor(-0.2445), 'Showgirls (1995)', 1.9565217391304348),
 (tensor(-0.2393), 'Free Willy 3: The Rescue (1997)', 1.7407407407407407),
 (tensor(-0.2383), 'Leave It to Beaver (1997)', 1.8409090909090908),
 (tensor(-0.2355), 'Crow: City of Angels, The (1996)', 1.9487179487179487),
 (tensor(-0.2269),
  'Lawnmower Man 2: Beyond Cyberspace (1996)',
  1.7142857142857142),
 (tensor(-0.2221), 'Beautician and the Beast, The (1997)', 2.313953488372093),
 (tensor(-0.2202), 'Barb Wire (1996)', 1.9333333333333333),
 (tensor(-0.2080), 'Cable Guy, The (1996)', 2.339622641509434),
 (tensor(-0.2038), 'Beverly Hills Ninja (1997)', 2.3125),
 (tensor(-0.2036), 'Striptease (1996)', 2.2388059701492535),
 (tensor(-0.1995), 