<a href="https://colab.research.google.com/github/abdu1z1z/RecommendSystem/blob/main/BldModel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import the dataset
import pandas as pd
movies_df = pd.read_csv('/content/movies.csv')
ratings_df = pd.read_csv('/content/ratings.csv')

In [None]:
print('The dimensions of movies dataframe are:', movies_df.shape,'\nThe dimensions of ratings dataframe are:', ratings_df.shape)


The dimensions of movies dataframe are: (9742, 3) 
The dimensions of ratings dataframe are: (100836, 4)


In [None]:
movies_df.head()


Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
movie_names = movies_df.set_index('movieId')['title'].to_dict()
n_users = len(ratings_df.userId.unique())
n_items = len(ratings_df.movieId.unique())
print("Number of unique users:", n_users)
print("Number of unique movies:", n_items)
print("The full rating matrix will have:", n_users*n_items, 'elements.')
print('----------')
print("Number of ratings:", len(ratings_df))
print("Therefore: ", len(ratings_df) / (n_users*n_items) * 100, '% of the matrix is filled.')
print("We have an incredibly sparse matrix to work with here.")
print("And... as you can imagine, as the number of users and products grow, the number of elements will increase by n*2")
print("You are going to need a lot of memory to work with global scale... storing a full matrix in memory would be a challenge.")
print("One advantage here is that matrix factorization can realize the rating matrix implicitly, thus we don't need all the data")

Number of unique users: 610
Number of unique movies: 9724
The full rating matrix will have: 5931640 elements.
----------
Number of ratings: 100836
Therefore:  1.6999683055613624 % of the matrix is filled.
We have an incredibly sparse matrix to work with here.
And... as you can imagine, as the number of users and products grow, the number of elements will increase by n*2
You are going to need a lot of memory to work with global scale... storing a full matrix in memory would be a challenge.
One advantage here is that matrix factorization can realize the rating matrix implicitly, thus we don't need all the data


In [None]:

import torch
import numpy as np
from torch.autograd import Variable
from tqdm import tqdm_notebook as tqdm

class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        # create user embeddings
        self.user_factors = torch.nn.Embedding(n_users, n_factors) # think of this as a lookup table for the input.
        # create item embeddings
        self.item_factors = torch.nn.Embedding(n_items, n_factors) # think of this as a lookup table for the input.
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)

    def forward(self, data):
        # matrix multiplication
        users, items = data[:,0], data[:,1]
        return (self.user_factors(users)*self.item_factors(items)).sum(1)
    # def forward(self, user, item):
    # 	# matrix multiplication
    #     return (self.user_factors(user)*self.item_factors(item)).sum(1)

    def predict(self, user, item):
        return self.forward(user, item)

In [None]:
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader # package that helps transform your data to machine learning readiness

# Note: This isn't 'good' practice, in a MLops sense but we'll roll with this since the data is already loaded in memory.
class Loader(Dataset):
    def __init__(self):
        self.ratings = ratings_df.copy()

        # Extract all user IDs and movie IDs
        users = ratings_df.userId.unique()
        movies = ratings_df.movieId.unique()

        #--- Producing new continuous IDs for users and movies ---

        # Unique values : index
        self.userid2idx = {o:i for i,o in enumerate(users)}
        self.movieid2idx = {o:i for i,o in enumerate(movies)}

        # Obtained continuous ID for users and movies
        self.idx2userid = {i:o for o,i in self.userid2idx.items()}
        self.idx2movieid = {i:o for o,i in self.movieid2idx.items()}

        # return the id from the indexed values as noted in the lambda function down below.
        self.ratings.movieId = ratings_df.movieId.apply(lambda x: self.movieid2idx[x])
        self.ratings.userId = ratings_df.userId.apply(lambda x: self.userid2idx[x])


        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y = self.ratings['rating'].values
        self.x, self.y = torch.tensor(self.x), torch.tensor(self.y) # Transforms the data to tensors (ready for torch models.)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.ratings)

In [None]:
num_epochs = 128
cuda = torch.cuda.is_available()

print("Is running on GPU:", cuda)

model = MatrixFactorization(n_users, n_items, n_factors=8)
print(model)
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)
# GPU enable if you have a GPU...
if cuda:
    model = model.cuda()

# MSE loss
loss_fn = torch.nn.MSELoss()

# ADAM optimizier
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train data
train_set = Loader()
train_loader = DataLoader(train_set, 128, shuffle=True)



Is running on GPU: False
MatrixFactorization(
  (user_factors): Embedding(610, 8)
  (item_factors): Embedding(9724, 8)
)
user_factors.weight tensor([[3.5413e-02, 9.9210e-03, 3.0095e-02,  ..., 4.8932e-02, 4.0189e-03,
         3.5180e-02],
        [4.4238e-02, 3.5092e-02, 2.7674e-02,  ..., 4.6299e-02, 3.9402e-02,
         2.1629e-02],
        [2.0775e-02, 8.7451e-03, 2.3533e-02,  ..., 3.1560e-03, 2.1126e-02,
         1.7577e-02],
        ...,
        [5.2140e-03, 4.6269e-02, 3.6920e-02,  ..., 6.5526e-03, 1.4704e-02,
         4.3300e-02],
        [3.9666e-02, 3.6085e-02, 3.8065e-02,  ..., 2.5180e-02, 3.2003e-02,
         4.8861e-02],
        [4.2645e-02, 1.4168e-02, 2.6809e-02,  ..., 3.8679e-02, 6.7964e-05,
         2.5795e-02]])
item_factors.weight tensor([[0.0357, 0.0010, 0.0257,  ..., 0.0498, 0.0083, 0.0344],
        [0.0488, 0.0104, 0.0042,  ..., 0.0468, 0.0448, 0.0352],
        [0.0484, 0.0400, 0.0249,  ..., 0.0159, 0.0272, 0.0421],
        ...,
        [0.0385, 0.0192, 0.0418,  ...,

In [None]:
for it in tqdm(range(num_epochs)):
    losses = []
    for x, y in train_loader:
         if cuda:
            x, y = x.cuda(), y.cuda()
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs.squeeze(), y.type(torch.float32))
            losses.append(loss.item())
            loss.backward()
            optimizer.step()
    print("iter #{}".format(it), "Loss:", sum(losses) / len(losses))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for it in tqdm(range(num_epochs)):


  0%|          | 0/128 [00:00<?, ?it/s]

ZeroDivisionError: division by zero

In [None]:

# By training the model, we will have tuned latent factors for movies and users.
c = 0
uw = 0
iw = 0
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)
        if c == 0:
          uw = param.data
          c +=1
        else:
          iw = param.data
        #print('param_data', param_data)

user_factors.weight tensor([[3.5413e-02, 9.9210e-03, 3.0095e-02,  ..., 4.8932e-02, 4.0189e-03,
         3.5180e-02],
        [4.4238e-02, 3.5092e-02, 2.7674e-02,  ..., 4.6299e-02, 3.9402e-02,
         2.1629e-02],
        [2.0775e-02, 8.7451e-03, 2.3533e-02,  ..., 3.1560e-03, 2.1126e-02,
         1.7577e-02],
        ...,
        [5.2140e-03, 4.6269e-02, 3.6920e-02,  ..., 6.5526e-03, 1.4704e-02,
         4.3300e-02],
        [3.9666e-02, 3.6085e-02, 3.8065e-02,  ..., 2.5180e-02, 3.2003e-02,
         4.8861e-02],
        [4.2645e-02, 1.4168e-02, 2.6809e-02,  ..., 3.8679e-02, 6.7964e-05,
         2.5795e-02]])
item_factors.weight tensor([[0.0357, 0.0010, 0.0257,  ..., 0.0498, 0.0083, 0.0344],
        [0.0488, 0.0104, 0.0042,  ..., 0.0468, 0.0448, 0.0352],
        [0.0484, 0.0400, 0.0249,  ..., 0.0159, 0.0272, 0.0421],
        ...,
        [0.0385, 0.0192, 0.0418,  ..., 0.0480, 0.0232, 0.0055],
        [0.0389, 0.0053, 0.0041,  ..., 0.0182, 0.0332, 0.0431],
        [0.0435, 0.0040, 0.0467

In [None]:
trained_movie_embeddings = model.item_factors.weight.data.cpu().numpy()


In [None]:

len(trained_movie_embeddings) # unique movie factor weights

9724

In [None]:

from sklearn.cluster import KMeans
# Fit the clusters based on the movie weights
kmeans = KMeans(n_clusters=10, random_state=0).fit(trained_movie_embeddings)

In [None]:
'''It can be seen here that the movies that are in the same cluster tend to have
similar genres. Also note that the algorithm is unfamiliar with the movie name
and only obtained the relationships by looking at the numbers representing how
users have responded to the movie selections.'''
for cluster in range(10):
  print("Cluster #{}".format(cluster))
  movs = []
  for movidx in np.where(kmeans.labels_ == cluster)[0]:
    movid = train_set.idx2movieid[movidx]
    rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
    movs.append((movie_names[movid], rat_count))
  for mov in sorted(movs, key=lambda tup: tup[1], reverse=True)[:10]:
    print("\t", mov[0])

Cluster #0


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
	 Good Will Hunting (1997)
	 Babe (1995)
	 Truman Show, The (1998)
	 Ghostbusters (a.k.a. Ghost Busters) (1984)
	 Kill Bill: Vol. 2 (2004)
	 WALL·E (2008)
	 V for Vendetta (2006)
	 Who Framed Roger Rabbit? (1988)
	 Snatch (2000)
Cluster #1


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Schindler's List (1993)
	 Independence Day (a.k.a. ID4) (1996)
	 Godfather, The (1972)
	 Lord of the Rings: The Two Towers, The (2002)
	 Lion King, The (1994)
	 Dark Knight, The (2008)
	 Die Hard (1988)
	 Indiana Jones and the Last Crusade (1989)
	 X-Men (2000)
	 One Flew Over the Cuckoo's Nest (1975)
Cluster #2


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Star Wars: Episode IV - A New Hope (1977)
	 Fight Club (1999)
	 Batman (1989)
	 True Lies (1994)
	 Back to the Future (1985)
	 Ace Ventura: Pet Detective (1994)
	 Mask, The (1994)
	 Alien (1979)
	 Léon: The Professional (a.k.a. The Professional) (Léon) (1994)
	 Eternal Sunshine of the Spotless Mind (2004)
Cluster #3


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Terminator 2: Judgment Day (1991)
	 Aladdin (1992)
	 Speed (1994)
	 Pretty Woman (1990)
	 GoldenEye (1995)
	 Beautiful Mind, A (2001)
	 Jumanji (1995)
	 Shining, The (1980)
	 Big Lebowski, The (1998)
	 Sleepless in Seattle (1993)
Cluster #4


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Forrest Gump (1994)
	 Silence of the Lambs, The (1991)
	 Usual Suspects, The (1995)
	 American Beauty (1999)
	 Apollo 13 (1995)
	 Saving Private Ryan (1998)
	 Fargo (1996)
	 Mission: Impossible (1996)
	 Die Hard: With a Vengeance (1995)
	 Princess Bride, The (1987)
Cluster #5


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Pulp Fiction (1994)
	 Toy Story (1995)
	 Dances with Wolves (1990)
	 Beauty and the Beast (1991)
	 E.T. the Extra-Terrestrial (1982)
	 Home Alone (1990)
	 Net, The (1995)
	 2001: A Space Odyssey (1968)
	 Departed, The (2006)
	 Trainspotting (1996)
Cluster #6


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Star Wars: Episode VI - Return of the Jedi (1983)
	 Men in Black (a.k.a. MIB) (1997)
	 Memento (2000)
	 Inception (2010)
	 Star Wars: Episode I - The Phantom Menace (1999)
	 Batman Forever (1995)
	 Terminator, The (1984)
	 Goodfellas (1990)
	 Incredibles, The (2004)
	 Ocean's Eleven (2001)
Cluster #7


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Jurassic Park (1993)
	 Star Wars: Episode V - The Empire Strikes Back (1980)
	 Fugitive, The (1993)
	 Sixth Sense, The (1999)
	 Gladiator (2000)
	 Pirates of the Caribbean: The Curse of the Black Pearl (2003)
	 Titanic (1997)
	 Dumb & Dumber (Dumb and Dumber) (1994)
	 American History X (1998)
	 Aliens (1986)
Cluster #8


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Seven (a.k.a. Se7en) (1995)
	 Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
	 Lord of the Rings: The Fellowship of the Ring, The (2001)
	 Mrs. Doubtfire (1993)
	 Finding Nemo (2003)
	 Blade Runner (1982)
	 Clueless (1995)
	 American Pie (1999)
	 Heat (1995)
	 Office Space (1999)
Cluster #9


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Shawshank Redemption, The (1994)
	 Matrix, The (1999)
	 Braveheart (1995)
	 Lord of the Rings: The Return of the King, The (2003)
	 Shrek (2001)
	 Groundhog Day (1993)
	 Monty Python and the Holy Grail (1975)
	 Kill Bill: Vol. 1 (2003)
	 Twister (1996)
	 Willy Wonka & the Chocolate Factory (1971)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating