In [71]:
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
import pandas as pd

# Učitavanje MovieLens dataset-a (pretpostavlja se da su podaci u obliku userId, movieId, rating)
movies = pd.read_csv('./podaci/movie.csv')
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Konfiguracija čitača za Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Podijela podataka na skup za treniranje i skup za testiranje
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Inicijalizacija i treniranje SVD modela
svd_model = SVD()
svd_model.fit(trainset)

# Funkcija za preporuku filmova na temelju treniranog SVD modela
def recommend_movies_svd_surprise(user_id, model, top_n=10):
    # Dohvaćanje filmova koje korisnik nije ocijenio
    user_rated_movies = set(ratings[ratings['userId'] == user_id]['movieId'])
    all_movies = set(ratings['movieId'])
    unrated_movies = list(all_movies - user_rated_movies)

    # Generiranje predikcija za sve neocijenjene filmove
    predictions = [model.predict(user_id, movie_id) for movie_id in unrated_movies]

    # Sortiranje predikcija prema ocjenama
    sorted_predictions = sorted(predictions, key=lambda x: x.est, reverse=True)

    # Stvaranje DataFrame-a s preporučenim filmovima
    recommended_movies_info = pd.DataFrame(columns=['movieId', 'title', 'genres', 'est'])

    for prediction in sorted_predictions[:top_n]:
        movie_info = movies[movies['movieId'] == prediction.iid][['movieId', 'title', 'genres']]
        movie_info['est'] = prediction.est
        recommended_movies_info = pd.concat([recommended_movies_info, movie_info], ignore_index=True)

    return recommended_movies_info[['movieId', 'title', 'genres', 'est']]

# Primjer korištenja
user_id_to_recommend = 600  # Postavite željeni ID korisnika
recommendations_svd_surprise = recommend_movies_svd_surprise(user_id_to_recommend, svd_model)

# Ispis preporučenih filmova
for _, row in recommendations_svd_surprise.iterrows():
    movie_id = int(row['movieId'])
    title = row['title']
    genres = row['genres']
    print(f"ID: {movie_id}, Naslov: {title} ({genres})")

ID: 926, Naslov: All About Eve (1950) (Drama)
ID: 969, Naslov: African Queen, The (1951) (Adventure|Comedy|Romance|War)
ID: 858, Naslov: Godfather, The (1972) (Crime|Drama)
ID: 1172, Naslov: Cinema Paradiso (Nuovo cinema Paradiso) (1989) (Drama)
ID: 111, Naslov: Taxi Driver (1976) (Crime|Drama|Thriller)
ID: 905, Naslov: It Happened One Night (1934) (Comedy|Romance)
ID: 3462, Naslov: Modern Times (1936) (Comedy|Drama|Romance)
ID: 318, Naslov: Shawshank Redemption, The (1994) (Crime|Drama)
ID: 6016, Naslov: City of God (Cidade de Deus) (2002) (Action|Adventure|Crime|Drama|Thriller)
ID: 1252, Naslov: Chinatown (1974) (Crime|Film-Noir|Mystery|Thriller)


  recommended_movies_info = pd.concat([recommended_movies_info, movie_info], ignore_index=True)


In [66]:
from surprise import Dataset, Reader, KNNBasic
from surprise.model_selection import train_test_split

# Učitavanje MovieLens dataset-a (pretpostavlja se da su podaci u obliku userId, movieId, rating)
movies = pd.read_csv('./podaci/movie.csv')
ratings = pd.read_csv('./podaci/ratings_small.csv')

# Konfiguracija čitača za Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

# Podijela podataka na skup za treniranje i skup za testiranje
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

# Inicijalizacija i treniranje KNN modela
knn_model = KNNBasic(sim_options={'user_based': True})  # user_based postavljen na True za sustav preporuke temeljen na korisnicima
knn_model.fit(trainset)

# Funkcija za preporuku filmova na temelju treniranog KNN modela
def recommend_movies_knn_surprise(user_id, model, top_n=10):
    # Dohvaćanje filmova koje korisnik nije ocijenio
    user_rated_movies = set(ratings[ratings['userId'] == user_id]['movieId'])
    all_movies = set(ratings['movieId'])
    unrated_movies = list(all_movies - user_rated_movies)

    # Generiranje preporuka za korisnika
    recommendations = model.get_neighbors(user_id, k=top_n)

    # Prikazivanje informacija o preporučenim filmovima
    recommended_movies_info = movies[movies['movieId'].isin(recommendations)][['movieId', 'title', 'genres']]

    return recommended_movies_info

# Primjer korištenja
user_id_to_recommend = 500  # Postavite željeni ID korisnika
recommendations_knn_surprise = recommend_movies_knn_surprise(user_id_to_recommend, knn_model)

# Ispis preporučenih filmova
for _, row in recommendations_knn_surprise.iterrows():
    movie_id = int(row['movieId'])
    title = row['title']
    genres = row['genres']
    print(f"ID: {movie_id}, Naslov: {title} ({genres})")

Computing the msd similarity matrix...
Done computing similarity matrix.
ID: 51, Naslov: Guardian Angel (1994) (Action|Drama|Thriller)
ID: 296, Naslov: Pulp Fiction (1994) (Comedy|Crime|Drama|Thriller)
ID: 365, Naslov: Little Buddha (1993) (Drama)
ID: 368, Naslov: Maverick (1994) (Adventure|Comedy|Western)
ID: 428, Naslov: Bronx Tale, A (1993) (Drama)
ID: 480, Naslov: Jurassic Park (1993) (Action|Adventure|Sci-Fi|Thriller)
ID: 538, Naslov: Six Degrees of Separation (1993) (Drama)
ID: 545, Naslov: Harem (1985) (Drama)
ID: 611, Naslov: Hellraiser: Bloodline (1996) (Action|Horror|Sci-Fi)


In [43]:
from sklearn.metrics import mean_squared_error

from surprise.model_selection import cross_validate

# Funkcija za preporuku filmova na temelju treniranog modela
def recommend_and_evaluate(model, user_id, top_n=10):
    # Preporuka
    recommendations = []
    if isinstance(model, SVD):
        recommendations = recommend_movies_svd_surprise(user_id, model, top_n)
    elif isinstance(model, KNNBasic):
        recommendations = recommend_movies_knn_surprise(user_id, model, top_n)
    else:
        raise ValueError("Nepodržani model")

    # Evaluacija unakrsnom validacijom
    results = cross_validate(model, data, measures=['MAE', 'RMSE'], cv=5, verbose=False)

    # Ispis rezultata unakrsne validacije
    print(f'Rezultati unakrsne validacije za model {model.__class__.__name__}:')
    print(f"MAE: {np.mean(results['test_mae']):.4f}")
    print(f"RMSE: {np.mean(results['test_rmse']):.4f}")

    # Evaluacija preporuka
    predictions = [model.predict(user_id, movie_id).est for movie_id in recommendations['movieId']]
    true_ratings = [
        float(ratings.loc[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id), 'rating'].iloc[0])
        if ratings[(ratings['userId'] == user_id) & (ratings['movieId'] == movie_id)].shape[0] > 0
        else 0  # Ako nema ocjene, postavi na 0
        for movie_id in recommendations['movieId']
    ]

    # Ispis evaluacije preporuka
    mse = mean_squared_error(true_ratings, predictions)
    print(f'MSE za korisnika {user_id}: {mse:.4f}')

# Unakrsna validacija za SVD model
recommend_and_evaluate(svd_model, user_id_to_recommend)

# Unakrsna validacija za KNN model
recommend_and_evaluate(knn_model, user_id_to_recommend)

  recommended_movies_info = pd.concat([recommended_movies_info, movie_info], ignore_index=True)


Rezultati unakrsne validacije za model SVD:
MAE: 0.6904
RMSE: 0.8965
MSE za korisnika 1: 12.6758
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Rezultati unakrsne validacije za model KNNBasic:
MAE: 0.7442
RMSE: 0.9677
MSE za korisnika 1: 12.5151


In [73]:
import pandas as pd
import numpy as np
ratings = pd.read_csv('./podaci/ratings_small.csv')

from surprise import KNNWithMeans
from surprise import SVD

# k Nearest Neighbour
similarity = {
    "name": "cosine",
    "user_based": False,  # item-based similarity
}
algo_KNN = KNNWithMeans(sim_options = similarity)


# SVD
algo_SVD = SVD()

movie_rating_set = pd.crosstab(index = ratings.userId, columns = ratings.movieId, values = ratings.rating, aggfunc = np.mean)

from surprise import Dataset
from surprise import Reader

# load df into Surprise Reader object
reader = Reader(rating_scale = (0,5))
rating_df = Dataset.load_from_df(ratings[['userId', 'movieId', 'rating']], reader)

from surprise.model_selection import cross_validate

cross_validate_KNN = cross_validate(algo_KNN, rating_df, measures=['RMSE', 'MAE'], cv=5, verbose=True)

cross_validate_SVD = cross_validate(algo_SVD, rating_df, measures=['RMSE', 'MAE'], cv=5, verbose=True)

from surprise.model_selection import train_test_split
from surprise import accuracy

# define train test function
def train_test_algo(algo, label):
    training_set, testing_set = train_test_split(rating_df, test_size = 0.2)
    algo.fit(training_set)
    test_output = algo.test(testing_set)
    test_df = pd.DataFrame(test_output)
    
    print("RMSE -",label, accuracy.rmse(test_output, verbose = False))
    print("MAE -", label, accuracy.mae(test_output, verbose=False))
    print("MSE -", label, accuracy.mse(test_output, verbose=False))
    
    return test_df

train_test_KNN = train_test_algo(algo_KNN, "algo_KNN")
print(train_test_KNN.head())

train_test_SVD = train_test_algo(algo_SVD, "algo_SVD")
print(train_test_SVD.head())

movie_df = pd.read_csv('./podaci/movie.csv')

def prediction(algo, users_K):
    pred_list = []
    for userId in range(1,users_K):
        for movieId in range(1,9067):
            rating = algo.predict(userId, movieId).est
            pred_list.append([userId, movieId, rating])
    pred_df = pd.DataFrame(pred_list, columns = ['userId', 'movieId', 'rating'])
    return pred_df

def top_recommendations(pred_df, top_N):
    recommended_movie = pd.merge(pred_df, movie_df, how='inner', left_on='movieId', right_on='movieId')
    sorted_df = recommended_movie.groupby(('userId'), as_index = False).apply(lambda x: x.sort_values(['rating'], ascending = False)).reset_index(drop=True)
    top_recommended_movies = sorted_df.groupby('userId').head(top_N)
    return sorted_df, top_recommended_movies

pred_KNN = prediction(algo_KNN, 10)
recommended_movies_KNN, top_recommended_movies_KNN = top_recommendations(pred_KNN, 3)

## SVD predictions
pred_SVD = prediction(algo_SVD, 10)
recommended_movies_SVD, top_recommended_movies_SVD = top_recommendations(pred_SVD, 3)

  movie_rating_set = pd.crosstab(index = ratings.userId, columns = ratings.movieId, values = ratings.rating, aggfunc = np.mean)


Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.9269  0.9311  0.9340  0.9131  0.9339  0.9278  0.0078  
MAE (testset)     0.7071  0.7124  0.7138  0.7002  0.7175  0.7102  0.0060  
Fit time          5.30    5.53    5.52    5.47    5.53    5.47    0.09    
Test time         4.72    4.39    4.64    4.46    4.54    4.55    0.12    
Evaluating RMSE, MAE of algorithm SVD on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8986  0.8938  0.8918  0.8983  0.9

In [75]:
top_recommended_movies_KNN

Unnamed: 0,userId,movieId,rating,title,genres
0,1,5229,5.0,I Think I Do (1997),Comedy
1,1,6033,5.0,Mystery Date (1991),Comedy
2,1,4591,5.0,Erik the Viking (1989),Adventure|Comedy|Fantasy
8337,2,1531,5.0,Losing Chase (1996),Drama
8338,2,2945,5.0,Mike's Murder (1984),Mystery
8339,2,764,5.0,Heavy (1995),Drama|Romance
16674,3,4626,5.0,Miracle Mile (1989),Drama|Romance|Sci-Fi
16675,3,2201,5.0,"Paradine Case, The (1947)",Drama
16676,3,3112,5.0,'night Mother (1986),Drama
25011,4,4405,5.0,"Last Laugh, The (Letzte Mann, Der) (1924)",Drama


In [76]:
top_recommended_movies_SVD

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1221,3.848289,"Godfather: Part II, The (1974)",Crime|Drama
1,1,858,3.745083,"Godfather, The (1972)",Crime|Drama
2,1,1089,3.724464,Reservoir Dogs (1992),Crime|Mystery|Thriller
8337,2,318,4.485963,"Shawshank Redemption, The (1994)",Crime|Drama
8338,2,5618,4.445651,Spirited Away (Sen to Chihiro no kamikakushi) ...,Adventure|Animation|Fantasy
8339,2,969,4.424553,"African Queen, The (1951)",Adventure|Comedy|Romance|War
16674,3,318,4.407586,"Shawshank Redemption, The (1994)",Crime|Drama
16675,3,3462,4.379288,Modern Times (1936),Comedy|Drama|Romance
16676,3,1197,4.376543,"Princess Bride, The (1987)",Action|Adventure|Comedy|Fantasy|Romance
25011,4,899,5.0,Singin' in the Rain (1952),Comedy|Musical|Romance


In [77]:
recommended_movies_KNN

Unnamed: 0,userId,movieId,rating,title,genres
0,1,5229,5.000000,I Think I Do (1997),Comedy
1,1,6033,5.000000,Mystery Date (1991),Comedy
2,1,4591,5.000000,Erik the Viking (1989),Adventure|Comedy|Fantasy
3,1,4796,5.000000,"Grass Is Greener, The (1960)",Comedy|Romance
4,1,4252,5.000000,"Circle, The (Dayereh) (2000)",Drama
...,...,...,...,...,...
75028,9,4051,0.346876,Horrors of Spider Island (Ein Toter Hing im Ne...,Horror|Sci-Fi
75029,9,4559,0.338496,Vice Versa (1988),Comedy
75030,9,1636,0.338496,Stag (1997),Action|Thriller
75031,9,439,0.338496,Dangerous Game (1993),Drama


In [78]:
recommended_movies_SVD

Unnamed: 0,userId,movieId,rating,title,genres
0,1,1221,3.848289,"Godfather: Part II, The (1974)",Crime|Drama
1,1,858,3.745083,"Godfather, The (1972)",Crime|Drama
2,1,1089,3.724464,Reservoir Dogs (1992),Crime|Mystery|Thriller
3,1,296,3.705839,Pulp Fiction (1994),Comedy|Crime|Drama|Thriller
4,1,608,3.701482,Fargo (1996),Comedy|Crime|Drama|Thriller
...,...,...,...,...,...
75028,9,3997,2.362120,Dungeons & Dragons (2000),Adventure|Fantasy
75029,9,3979,2.356187,Little Nicky (2000),Comedy
75030,9,2701,2.277037,Wild Wild West (1999),Action|Comedy|Sci-Fi|Western
75031,9,1556,2.227273,Speed 2: Cruise Control (1997),Action|Romance|Thriller
