In [49]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.metrics.pairwise import cosine_similarity

# Učitavanje MovieLens dataset-a
movies = pd.read_csv('./podaci/movie.csv')
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Stvaranje matrice ocjena
user_movie_ratings = ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

# Pretvorba DataFrame-a u numpy array
user_movie_ratings_np = user_movie_ratings.to_numpy()

# Funkcija za preporuku filmova korisniku
def recommend_movies(user_id):
    # Primjena SVD
    U, sigma, Vt = svds(user_movie_ratings_np, k=50)
    sigma = np.diag(sigma)

    # Generiranje predikcija ocjena za sve filmove
    predicted_ratings = np.dot(np.dot(U, sigma), Vt)
    predicted_ratings_df = pd.DataFrame(predicted_ratings, columns=user_movie_ratings.columns)

    # Pronalaženje sličnih korisnika pomoću kosinusne sličnosti
    user_row = predicted_ratings_df.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(predicted_ratings_df.values, user_row)
    
    # Sortiranje korisnika prema sličnosti
    similar_users = list(enumerate(similarity_scores))
    similar_users = sorted(similar_users, key=lambda x: x[1], reverse=True)[1:11]  # Uzmi top 10 sličnih korisnika

    # Pronalaženje filmova koje korisnik nije ocijenio, a koje su ocijenili slični korisnici
    user_rated_movies = set(ratings[ratings['userId'] == user_id]['movieId'])
    recommended_movies = set()

    for similar_user, _ in similar_users:
        similar_user_rated_movies = set(ratings[ratings['userId'] == similar_user]['movieId'])
        common_movies = similar_user_rated_movies - user_rated_movies
        recommended_movies.update(common_movies)

    # Filtriranje visoko ocijenjenih filmova (preporučujemo samo filmove s ocjenom većom od 3.5)
    high_rated_movies = ratings[(ratings['movieId'].isin(recommended_movies)) & (ratings['rating'] > 3)]

    # Prikazivanje prvih 10 preporučenih filmova
    recommended_movies_info = (
        movies[movies['movieId'].isin(high_rated_movies['movieId'])][['movieId', 'title', 'genres']]
        .head(10)
    )

    return recommended_movies_info

# Primjer: Generiranje preporuka za određenog korisnika (npr., korisnik s ID-om 1)
user_id_to_recommend = 299
recommendations = recommend_movies(user_id_to_recommend)
print(f"Preporuke za korisnika sa ID {user_id_to_recommend}:")
for i, movie in recommendations.iterrows():
    print(f"{i + 1}. {movie['title']} ({movie['genres']})")

Preporuke za korisnika sa ID 299:
1. Toy Story (1995) (Adventure|Animation|Children|Comedy|Fantasy)
2. Jumanji (1995) (Adventure|Children|Fantasy)
3. Grumpier Old Men (1995) (Comedy|Romance)
5. Father of the Bride Part II (1995) (Comedy)
6. Heat (1995) (Action|Crime|Thriller)
7. Sabrina (1995) (Comedy|Romance)
10. GoldenEye (1995) (Action|Adventure|Thriller)
11. American President, The (1995) (Comedy|Drama|Romance)
14. Nixon (1995) (Drama)
15. Cutthroat Island (1995) (Action|Adventure|Romance)


In [29]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# Učitajte MovieLens dataset
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Funkcija za preporuku ocjene za stavku I za korisnika U
def predict_rating(user_id, item_id, user_movie_ratings, similarity_matrix, k=5):
    # Pronađi slične korisnike koristeći kosinusnu sličnost
    user_row = user_movie_ratings.loc[user_id].values.reshape(1, -1)
    similarity_scores = cosine_similarity(user_movie_ratings.values, user_row)
    
    # Sortiraj korisnike prema sličnosti
    similar_users = list(enumerate(similarity_scores))
    similar_users = sorted(similar_users, key=lambda x: x[1], reverse=True)[1:k+1]  # Uzmi top k sličnih korisnika
    
    # Izračunaj ponderiranu srednju ocjenu za stavku I koristeći slične korisnike
    numerator = 0
    denominator = 0
    
    for sim_user, sim_score in similar_users:
        if user_movie_ratings.loc[sim_user, item_id] != 0:  # Provjeri je li korisnik ocijenio stavku I
            numerator += sim_score * user_movie_ratings.loc[sim_user, item_id]
            denominator += sim_score
    
    if denominator == 0:
        return 0  # Ako nema sličnih korisnika koji su ocijenili stavku I, vrati 0
    
    predicted_rating = numerator / denominator
    return predicted_rating

# Primjer korištenja
user_id = 1  # Postavite željeni ID korisnika
item_id = 1  # Postavite željeni ID stavke kojoj želite predvidjeti ocjenu

predicted_rating = predict_rating(user_id, item_id, user_movie_ratings, None, k=5)
print(predicted_rating)

# ... (prethodni kod)

# Funkcija za preporuku filmova na temelju predviđenih ocjena
def recommend_movies(user_id, user_movie_ratings, similarity_matrix, k=5, threshold=3):
    # Prazna lista za preporučene filmove
    recommended_movies = []

    # Iteriraj kroz sve stavke koje korisnik nije ocijenio
    for item_id in user_movie_ratings.columns:
        if user_movie_ratings.loc[user_id, item_id] == 0:  # Provjeri je li korisnik ocijenio stavku
            # Predvidi ocjenu korisnika za stavku
            predicted_rating = predict_rating(user_id, item_id, user_movie_ratings, similarity_matrix, k)
            
            # Ako je predviđena ocjena veća od praga (threshold), dodaj stavku među preporučene
            if predicted_rating > threshold:
                recommended_movies.append((item_id, predicted_rating))

    # Sortiraj preporučene filmove prema predviđenim ocjenama (silazno)
    recommended_movies = sorted(recommended_movies, key=lambda x: x[1], reverse=True)

    return recommended_movies[:10]  # Vrati prvih 10 preporučenih filmova

# Primjer korištenja

# Preporuči filmove korisniku
recommended_movies = recommend_movies(user_id, user_movie_ratings, None, k=5, threshold=3)

# Ispis preporučenih filmova
for item_id, predicted_rating in recommended_movies:
    movie_title = movies[movies['movieId'] == item_id]['title'].values[0]
    print(f'Predloženi film: {movie_title}')


[4.]
Predloženi film: Babe (1995)
Predloženi film: Usual Suspects, The (1995)
Predloženi film: Bottle Rocket (1996)
Predloženi film: Blade Runner (1982)
Predloženi film: Wallace & Gromit: A Close Shave (1995)
Predloženi film: Dr. Strangelove or: How I Learned to Stop Worrying and Love the Bomb (1964)
Predloženi film: Godfather, The (1972)
Predloženi film: Fish Called Wanda, A (1988)
Predloženi film: Monty Python's Life of Brian (1979)
Predloženi film: Monty Python and the Holy Grail (1975)


In [46]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

# Učitavanje MovieLens dataset-a (pretpostavlja se da su podaci u obliku userId, movieId, rating)
movies = pd.read_csv('./podaci/movie.csv')
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Konfiguracija čitača za Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Podijela podataka na skup za treniranje i skup za testiranje
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Inicijalizacija i treniranje SVD modela
svd_model = SVD()
svd_model.fit(trainset)

# Funkcija za preporuku filmova na temelju treniranog SVD modela
def recommend_movies_svd_surprise(user_id, model, top_n=10):
    # Dohvaćanje filmova koje korisnik nije ocijenio
    user_rated_movies = set(ratings[ratings['userId'] == user_id]['movieId'])
    all_movies = set(ratings['movieId'])
    unrated_movies = list(all_movies - user_rated_movies)

    # Generiranje predikcija za sve neocijenjene filmove
    predictions = [model.predict(user_id, movie_id) for movie_id in unrated_movies]

    # Sortiranje predikcija prema ocjenama
    sorted_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)

    # Stvaranje DataFrame-a s preporučenim filmovima
    recommended_movies_info = pd.DataFrame(columns=['movieId', 'title', 'genres', 'est'])

    for prediction in sorted_predictions[:top_n]:
        movie_info = movies[movies['movieId'] == prediction.iid][['movieId', 'title', 'genres']]
        movie_info['est'] = prediction.est
        recommended_movies_info = pd.concat([recommended_movies_info, movie_info], ignore_index=True)

    return recommended_movies_info[['movieId', 'title', 'genres', 'est']]

# Primjer korištenja
user_id_to_recommend = 299  # Postavite željeni ID korisnika
recommendations_svd_surprise = recommend_movies_svd_surprise(user_id_to_recommend, svd_model)

# Ispis preporučenih filmova
for _, row in recommendations_svd_surprise.iterrows():
    movie_id = int(row['movieId'])
    title = row['title']
    genres = row['genres']
    print(f"ID: {movie_id}, Naslov: {title} ({genres})")

ID: 318, Naslov: Shawshank Redemption, The (1994) (Crime|Drama)
ID: 1172, Naslov: Cinema Paradiso (Nuovo cinema Paradiso) (1989) (Drama)
ID: 1197, Naslov: Princess Bride, The (1987) (Action|Adventure|Comedy|Fantasy|Romance)
ID: 58559, Naslov: Dark Knight, The (2008) (Action|Crime|Drama|IMAX)
ID: 50, Naslov: Usual Suspects, The (1995) (Crime|Mystery|Thriller)
ID: 899, Naslov: Singin' in the Rain (1952) (Comedy|Musical|Romance)
ID: 926, Naslov: All About Eve (1950) (Drama)
ID: 4226, Naslov: Memento (2000) (Mystery|Thriller)
ID: 6016, Naslov: City of God (Cidade de Deus) (2002) (Action|Adventure|Crime|Drama|Thriller)
ID: 858, Naslov: Godfather, The (1972) (Crime|Drama)


In [47]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split

# Učitavanje MovieLens dataset-a (pretpostavlja se da su podaci u obliku userId, movieId, rating)
movies = pd.read_csv('./podaci/movie.csv')
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Konfiguracija čitača za Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Podijela podataka na skup za treniranje i skup za testiranje
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Inicijalizacija i treniranje KNN modela
knn_model = KNNBasic(sim_options={'user_based': True})  # user_based postavljen na True za sustav preporuke temeljen na korisnicima
knn_model.fit(trainset)

# Funkcija za preporuku filmova na temelju treniranog KNN modela
def recommend_movies_knn_surprise(user_id, model, top_n=10):
    # Dohvaćanje filmova koje korisnik nije ocijenio
    user_rated_movies = set(ratings[ratings['userId'] == user_id]['movieId'])
    all_movies = set(ratings['movieId'])
    unrated_movies = list(all_movies - user_rated_movies)

    # Generiranje preporuka za korisnika
    recommendations = model.get_neighbors(user_id, k=top_n)

    # Prikazivanje informacija o preporučenim filmovima
    recommended_movies_info = movies[movies['movieId'].isin(recommendations)][['movieId', 'title', 'genres']]

    return recommended_movies_info

# Primjer korištenja
user_id_to_recommend = 299  # Postavite željeni ID korisnika
recommendations_knn_surprise = recommend_movies_knn_surprise(user_id_to_recommend, knn_model)

# Ispis preporučenih filmova
for _, row in recommendations_knn_surprise.iterrows():
    movie_id = int(row['movieId'])
    title = row['title']
    genres = row['genres']
    print(f"ID: {movie_id}, Naslov: {title} ({genres})")

Computing the msd similarity matrix...
Done computing similarity matrix.
ID: 8, Naslov: Tom and Huck (1995) (Adventure|Children)
ID: 32, Naslov: Twelve Monkeys (a.k.a. 12 Monkeys) (1995) (Mystery|Sci-Fi|Thriller)
ID: 42, Naslov: Dead Presidents (1995) (Action|Crime|Drama)
ID: 57, Naslov: Home for the Holidays (1995) (Drama)
ID: 65, Naslov: Bio-Dome (1996) (Comedy)
ID: 97, Naslov: Hate (Haine, La) (1995) (Crime|Drama)
ID: 115, Naslov: Happiness Is in the Field (Bonheur est dans le pré, Le) (1995) (Comedy)
ID: 145, Naslov: Bad Boys (1995) (Action|Comedy|Crime|Drama|Thriller)
ID: 201, Naslov: Three Wishes (1995) (Drama|Fantasy)


In [43]:
from sklearn.metrics import mean_squared_error

from surprise.model_selection import cross_validate

# Funkcija za preporuku filmova na temelju treniranog modela
def recommend_and_evaluate(model, user_id, top_n=10):
    # Preporuka
    recommendations = []
    if isinstance(model, SVD):
        recommendations = recommend_movies_svd_surprise(user_id, model, top_n)
    elif isinstance(model, KNNBasic):
        recommendations = recommend_movies_knn_surprise(user_id, model, top_n)
    else:
        raise ValueError("Nepodržani model")

    # Evaluacija unakrsnom validacijom
    results = cross_validate(model, data, measures=['MAE', 'RMSE'], cv=5, verbose=False)

    # Ispis rezultata unakrsne validacije
    print(f'Rezultati unakrsne validacije za model {model.__class__.__name__}:')
    print(f"MAE: {np.mean(results['test_mae']):.4f}")
    print(f"RMSE: {np.mean(results['test_rmse']):.4f}")

    # Evaluacija preporuka
    predictions = [model.predict(user_id, movie_id).est for movie_id in recommendations['movieId']]
    true_ratings = [
        float(ratings.loc[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id), 'rating'].iloc[0])
        if ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id)].shape[0] > 0
        else 0  # Ako nema ocjene, postavi na 0
        for movie_id in recommendations['movieId']
    ]

    # Ispis evaluacije preporuka
    mse = mean_squared_error(true_ratings, predictions)
    print(f'MSE za korisnika {user_id}: {mse:.4f}')

# Unakrsna validacija za SVD model
recommend_and_evaluate(svd_model, user_id_to_recommend)

# Unakrsna validacija za KNN model
recommend_and_evaluate(knn_model, user_id_to_recommend)

  recommended_movies_info = pd.concat([recommended_movies_info, movie_info], ignore_index=True)


Rezultati unakrsne validacije za model SVD:
MAE: 0.6904
RMSE: 0.8965
MSE za korisnika 1: 12.6758
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Rezultati unakrsne validacije za model KNNBasic:
MAE: 0.7442
RMSE: 0.9677
MSE za korisnika 1: 12.5151


In [19]:
import pandas as pd
import numpy as np
ratings = pd.read_csv('./podaci/ratings_small.csv')

from surprise import KNNWithMeans
from surprise import SVD

# k Nearest Neighbour
similarity = {
    "name": "cosine",
    "user_based": False,  # item-based similarity
}
algo_KNN = KNNWithMeans(sim_options = similarity)


# SVD
algo_SVD = SVD()

movie_rating_set = pd.crosstab(index = ratings.userId, columns = ratings.movieId, values = ratings.rating, aggfunc = np.mean)

from surprise import Dataset
from surprise import Reader

# load df into Surprise Reader object
reader = Reader(rating_scale = (0,5))
rating_df = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

from surprise.model_selection import cross_validate

cross_validate_KNN = cross_validate(algo_KNN, rating_df, measures=['RMSE', 'MAE'], cv=5, verbose=True)

cross_validate_SVD = cross_validate(algo_SVD, rating_df, measures=['RMSE', 'MAE'], cv=5, verbose=True)

from surprise.model_selection import train_test_split
from surprise import accuracy

# define train test function
def train_test_algo(algo, label):
    training_set, testing_set = train_test_split(rating_df, test_size = 0.2)
    algo.fit(training_set)
    test_output = algo.test(testing_set)
    test_df = pd.DataFrame(test_output)
    
    print("RMSE -",label, accuracy.rmse(test_output, verbose = False))
    print("MAE -", label, accuracy.mae(test_output, verbose=False))
    print("MSE -", label, accuracy.mse(test_output, verbose=False))
    
    return test_df

train_test_KNN = train_test_algo(algo_KNN, "algo_KNN")
print(train_test_KNN.head())

train_test_SVD = train_test_algo(algo_SVD, "algo_SVD")
print(train_test_SVD.head())

movie_df = pd.read_csv('./podaci/movie.csv')

def prediction(algo, users_K):
    pred_list = []
    for userId in range(1,users_K):
        for movieId in range(1,9067):
            rating = algo.predict(userId, movieId).est
            pred_list.append([userId, movieId, rating])
    pred_df = pd.DataFrame(pred_list, columns = ['userId', 'movieId', 'rating'])
    return pred_df

def top_recommendations(pred_df, top_N):
    recommended_movie = pd.merge(pred_df, movie_df, how='inner', left_on='movieId', right_on='movieId')
    sorted_df = recommended_movie.groupby(('userId'), as_index = False).apply(lambda x: x.sort_values(['rating'], ascending = False)).reset_index(drop=True)
    top_recommended_movies = sorted_df.groupby('userId').head(top_N)
    return sorted_df, top_recommended_movies

pred_KNN = prediction(algo_KNN, 10)
recommended_movies_KNN, top_recommended_movies_KNN = top_recommendations(pred_KNN, 3)

## SVD predictions
pred_SVD = prediction(algo_SVD, 10)
recommended_movies_SVD, top_recommended_movies_SVD = top_recommendations(pred_SVD, 3)

  movie_rating_set = pd.crosstab(index = ratings.userId, columns = ratings.movieId, values = ratings.rating, aggfunc = np.mean)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9373  0.9260  0.9277  0.9295  0.9247  0.9291  0.0044  
MAE (testset)     0.7159  0.7072  0.7097  0.7148  0.7092  0.7114  0.0034  
Fit time          5.25    5.37    5.24    5.97    5.25    5.42    0.28    
Test time         4.10    4.30    4.26    4.63    4.38    4.33    0.17    
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9008  0.9006  0.8953  0.8939  0.8

In [20]:
top_recommended_movies_KNN

Unnamed: 0,userId,movieId,rating,title,genres
0,1,3966,5.0,Detour (1945),Crime|Film-Noir
1,1,8699,5.0,Dancing in September (2000),Drama
2,1,2562,5.0,Bandits (1997),Drama
8337,2,4755,5.0,Wish Upon a Star (1996),Comedy
8338,2,565,5.0,Cronos (1993),Drama|Horror
8339,2,4252,5.0,"Circle, The (Dayereh) (2000)",Drama
16674,3,8522,5.0,My Little Chickadee (1940),Comedy|Western
16675,3,559,5.0,"Paris, France (1993)",Comedy
16676,3,4201,5.0,"End, The (1978)",Comedy
25011,4,1178,5.0,Paths of Glory (1957),Drama|War


In [21]:
top_recommended_movies_SVD

Unnamed: 0,userId,movieId,rating,title,genres
0,1,2064,3.870748,Roger & Me (1989),Documentary
1,1,1172,3.866015,Cinema Paradiso (Nuovo cinema Paradiso) (1989),Drama
2,1,905,3.737295,It Happened One Night (1934),Comedy|Romance
8337,2,969,4.605314,"African Queen, The (1951)",Adventure|Comedy|Romance|War
8338,2,858,4.492449,"Godfather, The (1972)",Crime|Drama
8339,2,953,4.441905,It's a Wonderful Life (1946),Drama|Fantasy|Romance
16674,3,318,4.469003,"Shawshank Redemption, The (1994)",Crime|Drama
16675,3,608,4.383267,Fargo (1996),Comedy|Crime|Drama|Thriller
16676,3,969,4.273866,"African Queen, The (1951)",Adventure|Comedy|Romance|War
25011,4,1198,5.0,Raiders of the Lost Ark (Indiana Jones and the...,Action|Adventure


In [22]:
recommended_movies_KNN

Unnamed: 0,userId,movieId,rating,title,genres
0,1,3966,5.000000,Detour (1945),Crime|Film-Noir
1,1,8699,5.000000,Dancing in September (2000),Drama
2,1,2562,5.000000,Bandits (1997),Drama
3,1,2897,5.000000,And the Ship Sails On (E la nave va) (1983),Comedy|War
4,1,6033,5.000000,Mystery Date (1991),Comedy
...,...,...,...,...,...
75028,9,3883,0.500000,Catfish in Black Bean Sauce (2000),Comedy|Drama
75029,9,7312,0.500000,"Follow Me, Boys! (1966)",Comedy|Drama
75030,9,5672,0.463828,Pokemon 4 Ever (a.k.a. Pokémon 4: The Movie) (...,Adventure|Animation|Children|Fantasy
75031,9,4559,0.419428,Vice Versa (1988),Comedy


In [23]:
recommended_movies_SVD

Unnamed: 0,userId,movieId,rating,title,genres
0,1,2064,3.870748,Roger & Me (1989),Documentary
1,1,1172,3.866015,Cinema Paradiso (Nuovo cinema Paradiso) (1989),Drama
2,1,905,3.737295,It Happened One Night (1934),Comedy|Romance
3,1,2959,3.701342,Fight Club (1999),Action|Crime|Drama|Thriller
4,1,50,3.687542,"Usual Suspects, The (1995)",Crime|Mystery|Thriller
...,...,...,...,...,...
75028,9,65,2.280790,Bio-Dome (1996),Comedy
75029,9,193,2.268027,Showgirls (1995),Drama
75030,9,1556,2.118779,Speed 2: Cruise Control (1997),Action|Romance|Thriller
75031,9,546,2.118417,Super Mario Bros. (1993),Action|Adventure|Children|Comedy|Fantasy|Sci-Fi
