<a href="https://colab.research.google.com/github/chiranjeevimacharla90/Machine-learning-using-python/blob/main/Singular%20Value%20Decomposition%20(SVD)%20/Movie_Recommendation_System_Using_SVD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Movie Recommendation System Using SVD**

### Importing the basic libraries

In [1]:
import numpy as np
import pandas as pd

In [2]:
from google.colab import files
uploaded = files.upload()



Saving movies.dat to movies.dat
Saving ratings.dat to ratings.dat


### Importing & Parsing the dataset as ratings and movies details

In [8]:
ratingData = pd.io.parsers.read_csv('ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'], engine='python', delimiter='::',encoding='ISO-8859-1')
movieData = pd.io.parsers.read_csv('movies.dat',
    names=['movie_id', 'title', 'genre'],engine='python', delimiter='::',encoding='ISO-8859-1')
print(ratingData)

         user_id  movie_id  rating       time
0              1      1193       5  978300760
1              1       661       3  978302109
2              1       914       3  978301968
3              1      3408       4  978300275
4              1      2355       5  978824291
...          ...       ...     ...        ...
1000204     6040      1091       1  956716541
1000205     6040      1094       5  956704887
1000206     6040       562       5  956704746
1000207     6040      1096       4  956715648
1000208     6040      1097       4  956715569

[1000209 rows x 4 columns]


### Create the ratings matrix of shape (m×u)

In [9]:
ratingMatrix = np.ndarray(
    shape=(np.max(ratingData.movie_id.values), np.max(ratingData.user_id.values)),
    dtype=np.uint8)
ratingMatrix[ratingData.movie_id.values-1, ratingData.user_id.values-1] = ratingData.rating.values
print(ratingMatrix)

[[  5  86 213 ...  39 205   3]
 [  0   0   0 ...  39 205  65]
 [  0   0   0 ...  41 205  65]
 ...
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]
 [  0   0   0 ...   0   0   0]]


### Subtract Mean off - Normalization

In [10]:
normalizedMatrix = ratingMatrix - np.asarray([(np.mean(ratingMatrix, 1))]).T
print(normalizedMatrix)

[[-4.86816225e+01  3.23183775e+01  1.59318377e+02 ... -1.46816225e+01
   1.51318377e+02 -5.06816225e+01]
 [-6.37061258e+01 -6.37061258e+01 -6.37061258e+01 ... -2.47061258e+01
   1.41293874e+02  1.29387417e+00]
 [-7.46185430e+01 -7.46185430e+01 -7.46185430e+01 ... -3.36185430e+01
   1.30381457e+02 -9.61854305e+00]
 ...
 [-3.27814570e-02 -3.27814570e-02 -3.27814570e-02 ... -3.27814570e-02
  -3.27814570e-02 -3.27814570e-02]
 [-2.58278146e-02 -2.58278146e-02 -2.58278146e-02 ... -2.58278146e-02
  -2.58278146e-02 -2.58278146e-02]
 [-2.42880795e-01 -2.42880795e-01 -2.42880795e-01 ... -2.42880795e-01
  -2.42880795e-01 -2.42880795e-01]]


### Computing SVD

In [11]:
A = normalizedMatrix.T / np.sqrt(ratingMatrix.shape[0] - 1)
U, S, V = np.linalg.svd(A)

### Calculate cosine similarity, sort by most similar and return the top N

In [12]:
def similar(ratingData, movie_id, top_n):
    index = movie_id - 1 # Movie id starts from 1
    movie_row = ratingData[index, :]
    magnitude = np.sqrt(np.einsum('ij, ij -> i', ratingData, ratingData)) #Einstein summation |  traditional matrix multiplication and is equivalent to np.matmul(a,b)
    similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
    sort_indexes = np.argsort(-similarity) #Perform an indirect sort along the given axis (Last axis)
    return sort_indexes[:top_n]

### Select k principal components to represent the movies, a movie_id to find recommendations and print the top_n results

In [13]:
k = 50
movie_id = 2
top_n = 5

sliced = V.T[:, :k] # representative data
indexes = similar(sliced, movie_id, top_n)

print('Recommendations for Movie {0}: \n'.format(
movieData[movieData.movie_id == movie_id].title.values[0]))
for id in indexes + 1:
    print(movieData[movieData.movie_id == id].title.values[0])

Recommendations for Movie Jumanji (1995): 

Jumanji (1995)
Sunset Strip (2000)
White Boys (1999)
Identification of a Woman (Identificazione di una donna) (1982)
Little City (1998)


  similarity = np.dot(movie_row, ratingData.T) / (magnitude[index] * magnitude)
