In [5]:
from scipy.io import loadmat
import numpy as np
import matplotlib.pyplot as plt

In [12]:
def loadMovieList():
    """
    Reads the fixed movie list in movie_ids.txt and returns a list of movie names.
    Returns
    -------
    movieNames : list
        A list of strings, representing all movie names.
    """
    # Read the fixed movieulary list
    with open('/home/Course_GoIT/Data_Science/Home_Work_7/movie_ids.txt', encoding='ISO-8859-1') as fid:
        movies = fid.readlines()

    movieNames = []
    for movie in movies:
        parts = movie.split()
        movieNames.append(' '.join(parts[1:]).strip())
    return movieNames

In [None]:
names = loadMovieList()

In [None]:
# Load data
data = loadmat('/home/Course_GoIT/Data_Science/Home_Work_7/movies.mat')
Y, R = data['Y'], data['R']

# Y is a 1682x943 matrix, containing ratings (1-5) of
# 1682 movies on 943 users

# R is a 1682x943 matrix, where R(i,j) = 1
# if and only if user j gave a rating to movie i

# From the matrix, we can compute statistics like average rating.
print('Average rating for movie 1601 (',names[1600] ,'): %f / 5' %
      np.mean(Y[1600, R[0, :]]))

# We can "visualize" the ratings matrix by plotting it with imshow
plt.figure(figsize=(8, 8))
plt.imshow(Y)
plt.ylabel('Movies')
plt.xlabel('Users')
plt.grid(False)

In [9]:
def cofiCostFunc(params, Y, R, num_users, num_movies, num_features, lambda_=0.0):
    X = params[:num_movies * num_features].reshape((num_movies, num_features))
    Theta = params[num_movies * num_features:].reshape((num_users, num_features))

    predictions = np.dot(X, Theta.T)

    error = (predictions - Y) * R
    J = 0.5 * np.sum(error**2)

    J += (lambda_ / 2.0) * (np.sum(Theta**2) + np.sum(X**2))

    X_grad = np.dot(error, Theta) + lambda_ * X
    Theta_grad = np.dot(error.T, X) + lambda_ * Theta

    grad = np.concatenate([X_grad.ravel(), Theta_grad.ravel()])

    return J, grad

In [None]:
movieList = loadMovieList()

In [None]:
movies_id_list = [0, 97, 6, 11, 53, 63, 65, 68, 182, 225, 354]

for movie in movies_id_list:
  print(movieList[movie])

In [None]:
pip install surprise

In [None]:
import pandas as pd
from surprise import SVD
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split

user_movie_ratings = []
for user_id, row in enumerate(R):
    for movie_id, rating in enumerate(row):
      if rating == 1:
        user_movie_ratings.append((user_id, movie_id, Y[user_id, movie_id]))

df = pd.DataFrame(user_movie_ratings, columns=['user', 'item', 'rating'])

reader = Reader(line_format='user item rating', sep='\t')
data = Dataset.load_from_df(df[['user', 'item', 'rating']], reader=reader)

trainset, testset = train_test_split(data, test_size=0.2)

model = SVD()

model.fit(trainset)

user_id = 1
N = 10  

user_ratings = model.test([(user_id, movie_id, 0) for movie_id in range(len(Y[0]))])

top_n = sorted(user_ratings, key=lambda x: x.est, reverse=True)[:N]

for i, prediction in enumerate(top_n):
      print(f"{i + 1}: Фільм з ідентифікатором {prediction.iid}, Прогнозована оцінка: {prediction.est}")