# Laboratorium 2 - collaborative filtering

## Przygotowanie

 * dataset i potrzebne biblioteki są dokładnie takie same jak na poprzednim laboratorium
 * pobierz i wypakuj dataset: https://files.grouplens.org/datasets/movielens/ml-latest-small.zip
   * więcej możesz poczytać tutaj: https://grouplens.org/datasets/movielens/
 * [opcjonalnie] Utwórz wirtualne środowisko
 `python3 -m venv ./recsyslab1`
 * zainstaluj potrzebne biblioteki:
 `pip install numpy pandas sklearn`

## Część 1. - przygotowanie danych

In [1]:
import math
import numpy as np
import pandas

from sklearn.model_selection import train_test_split, KFold

In [2]:
# liczba parametrow opisujacych filmy i uzytkownikow zalezy tylko od nas
K = 26

In [3]:
# wczytujemy oceny uytkownikow i od razu dzielimy je na dwa zbiory - treningowy i testowy

all_ratings = pandas.read_csv('ml-latest-small/ratings.csv').drop(columns=['timestamp'])
train_ratings_set, test_ratings_set = train_test_split(all_ratings, test_size=0.05)
train_ratings_set

Unnamed: 0,userId,movieId,rating
67551,437,1094,3.0
100373,610,68237,4.0
12042,74,1209,4.0
9122,62,139385,4.0
5572,40,121,4.0
...,...,...,...
72472,469,208,3.0
45354,298,97306,3.0
31033,217,870,1.0
50299,325,1614,2.0


In [4]:
# inicjalizujemy macierz preferencji uzytkownikow liczbami losowymi z przedzialu [0.0, 5.0]

def initialize_users(raw_ratings, k):
    users_no = raw_ratings['userId'].unique().size
    users = pandas.DataFrame(5.0 * np.random.uniform(size=(users_no, k)), index=raw_ratings['userId'].unique(), columns=['x%s' % i for i in range(k)])
    users.sort_index(inplace=True) 
    return users_no, users

users_no, users = initialize_users(train_ratings_set, K)
users

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25
1,0.977238,1.171817,4.375581,4.735521,2.925489,0.618315,1.842287,3.744789,4.918046,0.356759,...,4.347935,4.975207,4.427167,0.039181,4.984952,0.655751,0.449873,0.850748,3.867756,3.845123
2,2.397437,4.169148,4.689010,4.450612,1.419499,3.614266,1.341662,0.982680,1.527047,0.220406,...,1.113924,4.728121,3.837206,1.558734,3.261468,4.123728,0.142307,2.693445,4.518493,3.443164
3,0.512044,0.444823,3.376152,4.729659,1.022116,3.306682,0.522049,1.716704,0.474163,4.810117,...,3.196272,0.753754,3.122950,1.937346,3.757376,3.815225,1.179689,4.040862,3.651828,2.304868
4,1.422383,0.449519,3.221707,0.639061,2.483581,3.325977,4.777224,2.828473,2.731050,3.068294,...,0.262207,0.095989,2.388447,1.697233,4.881910,3.246280,0.750957,4.339303,4.668940,0.816229
5,3.197601,1.231578,1.237875,2.310584,0.873039,0.132871,1.081568,4.409627,4.104470,1.757760,...,4.305880,3.370484,1.119115,4.810419,2.784670,0.252680,0.527017,3.976891,4.972676,4.101408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,4.715144,4.418659,2.498431,1.353293,4.634113,1.885372,2.828988,3.254535,3.548495,4.811002,...,2.516574,3.215964,2.477345,3.638872,3.350286,0.481718,3.721710,2.898063,0.214363,2.271592
607,1.990104,3.248731,0.427185,3.875433,1.252650,3.207716,3.857395,1.766182,1.922393,1.010222,...,1.300349,0.634900,4.734029,0.581876,3.487267,1.049709,4.723724,1.724935,3.331618,4.056539
608,0.651605,3.527626,4.967762,4.982255,4.118903,2.426284,0.598209,1.826339,3.559847,0.250163,...,2.092778,2.548609,1.717774,0.723129,4.958602,4.574104,0.782509,4.320707,1.061613,4.200394
609,1.023762,1.449987,4.943478,0.156994,0.997076,3.823260,1.807928,4.317298,0.994449,2.664165,...,0.468156,4.179452,0.658839,1.543444,4.524253,2.116488,0.503615,4.942196,4.159179,3.088131


In [5]:
# inicjalizujemy macierz cech filmow liczbami losowymi z przedzialu [0.0, 1.0]

def initialize_movies(raw_ratings, k):
    movies_no = raw_ratings['movieId'].unique().size
    movies = pandas.DataFrame((1/K)* np.random.uniform(size=(movies_no, k)), index=raw_ratings['movieId'].unique(), columns=['x%s' % i for i in range(k)])
    movies.sort_index(inplace=True) 
    return movies_no, movies

movies_no, movies = initialize_movies(train_ratings_set, K)
movies

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,...,x16,x17,x18,x19,x20,x21,x22,x23,x24,x25
1,0.001039,0.007907,0.028489,0.025278,0.000016,0.036204,0.002198,0.010071,0.003576,0.004394,...,0.028175,0.030934,0.032125,0.012925,0.031076,0.015197,0.022402,0.004921,0.013155,0.011033
2,0.009400,0.033527,0.022849,0.021345,0.024841,0.020432,0.011585,0.009997,0.023932,0.002567,...,0.005328,0.013432,0.030488,0.010037,0.027426,0.026782,0.008550,0.014840,0.017139,0.007391
3,0.023792,0.008756,0.000482,0.000940,0.005189,0.036658,0.000033,0.025076,0.009772,0.021613,...,0.020279,0.020346,0.020283,0.013046,0.007397,0.013106,0.001817,0.024302,0.008251,0.022257
4,0.009499,0.017652,0.034364,0.022369,0.027518,0.011589,0.018432,0.016918,0.009793,0.034382,...,0.012593,0.027152,0.016603,0.026008,0.007030,0.015223,0.019109,0.036867,0.035828,0.023068
5,0.007222,0.001427,0.030314,0.022644,0.007053,0.012385,0.011565,0.034968,0.033604,0.022821,...,0.000828,0.012221,0.004732,0.035984,0.029742,0.026179,0.002496,0.027253,0.036609,0.005519
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.007523,0.026395,0.032456,0.013414,0.032284,0.035308,0.037548,0.030262,0.036038,0.002740,...,0.008575,0.004900,0.022393,0.000021,0.022118,0.035470,0.034092,0.034574,0.030383,0.035984
193583,0.002601,0.020643,0.035943,0.016960,0.018818,0.006942,0.002636,0.019307,0.036057,0.034850,...,0.006161,0.036821,0.016429,0.030767,0.025984,0.033843,0.015132,0.026886,0.029771,0.002562
193585,0.036672,0.005690,0.008583,0.017295,0.036422,0.023953,0.004819,0.004628,0.001702,0.029736,...,0.003332,0.014563,0.005064,0.011855,0.002295,0.035735,0.020924,0.027202,0.009774,0.010083
193587,0.004751,0.008032,0.003644,0.005136,0.024676,0.011692,0.007459,0.029630,0.031351,0.037859,...,0.028233,0.017748,0.026847,0.014488,0.015809,0.018675,0.016395,0.009133,0.022280,0.012167


In [6]:
# za pomoca sprytnej sztuczki przeksztalcamy oceny z formatu dostarczonego przez MovieLens do uzytecznej macierzy
# zwroc uwage na to, ze czesci filmow i uzytkownikow moze brakowac po podziale datasetu na dwie czesci
#   - byc moze warto uzupelnic brakujace kolumny i wiersze

def get_ratings(raw_ratings, movies, nan=False):
    ratings = raw_ratings.pivot(*raw_ratings.columns)
    if not nan:
        ratings = ratings.fillna(0.0)
    return ratings

ratings = get_ratings(train_ratings_set, movies)
ratings

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,2.5,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
607,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
608,2.5,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
609,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Część 2. - trening modelu

In [7]:
# trenujemy model iteracyjnie, wykorzystujac gradient descent

alpha = 0.00008 # learning speed
delta = 100 # minimal upgrade for each step
lambd = 0.05 # regularization weight

def calculate_user_preferences(users, movies, ratings, raw_ratings, users_no, movies_no, alpha, delta, lambd):
    total_error = 0.0
    users_model = users.copy()
    movies_model = movies.copy()
    
    while(True):
        previous_total_error = total_error

        predicted_ratings = np.dot(users_model, movies_model.T)
        errors = np.where(ratings==0.0, pandas.DataFrame(np.zeros((users_no, movies_no))), predicted_ratings - ratings)
        users_gradient = np.dot(errors, movies_model)
        movies_gradient = np.dot(errors.T, users_model)
        
        # zauwaz, ze nie uzywamy biasow i nie potrzebujemy dodatkowej macierzy do regularyzacji
        #  - wystarczy, ze uzyjemy odpowiednio macierzy users_model i movies_model
        # musimy zaktualizowac dwa modele
        
        users_model -= alpha * (users_gradient + lambd * users_model)
        movies_model -= alpha * (movies_gradient + lambd * movies_model )

        total_error = np.sum(errors ** 2)
        print(total_error)
        progress = abs(previous_total_error - total_error)
        if progress < delta:
            break
            
    return users_model, movies_model

users_model, movies_model = calculate_user_preferences(users, movies, ratings, train_ratings_set, users_no, movies_no, alpha, delta, lambd)

591360.4005260952
486834.41672325856
1424526.7284471653
8999890.084882252
71002261.68710157
484389942.48566717
329273566.8277704
259841285.03123987
1399209.6828458847
1294904.2930737045
1222671.5097789965
1169464.0558825277
1128247.1309660387
1094945.05930573
1067059.743094786
1042990.8039467772
1021676.4618243083
1002392.2797531188
984632.6780811878
968038.2395950279
952349.5356642631
937376.9292494918
922980.3346057803
909055.3655967911
895523.6868848895
882326.1895292456
869418.0992236797
856765.4274084747
844342.3677871359
832129.3660540425
820111.6738987972
808278.2546501372
796620.946550324
785133.8164846402
773812.6558237543
762654.5833673689
751657.7298789246
740820.985510958
730143.7963230413
719625.9996354606
709267.6905265725
699069.1136433176
689030.5758479219
679152.3762102356
669434.7505760529
659877.8284711041
650481.6004921478
641245.8946297272
632170.360190381
623254.4581599544
614497.4569891009
605898.4328974703
597456.273892031
589169.6867822941
581037.20655441
57305

59785.18886423939
59628.51409856276
59472.690616107284
59317.71260148788
59163.57428777547
59010.26995599949
58857.79393465587
58706.14059922231
58555.30437167912
58405.27972003636
58256.061157867734
58107.64324384943
57960.020581305325
57813.18781775824
57667.139644486575
57521.87079608639
57377.37605003948
57233.650226286176
57090.68818680449
56948.48483519341
56807.035116261955
56666.33401562373
56526.37655929534
56387.15781330081
56248.672883280284
56110.91691410404
55973.88508949034
55837.57263162847
55701.97480080692
55567.086895044806
55432.90424972867
55299.42223725298
55166.63626666565
55034.54178331732
54903.134268514754
54772.4092391783
54642.3622475042
54512.98888062945
54384.284760301714
54256.24554255311
54128.86691737685
54002.14460840843
53876.074372610376
53750.651999960726
53625.87331314444
53501.73416724949
53378.23044946516
53255.35807878479
53133.1130057114
53011.49121196673
52890.48871020391
52770.101543722645
52650.32578618804
52531.15754135297
52412.59294278289


## Część 3. - podobieństwo elementów

In [8]:
# przygotujmy funkcje obliczajaca odleglosc cosinusowa miedzy kazda para elementow (filmow lub uzytkownikow)

def cosine_similarity(vectors):
    # przydadza nam sie dlugosci wektorow
    # poniewaz w kolejnej czesci bedziemy korzystac z masked arrays, nie mozemy uzyc najprostszej metody
    # musimy zaimplementowac to sami
    lengths = np.sqrt(np.sum(vectors ** 2, axis=1))
    # podobienstwo liczymy w dwoch krokach - najpierw liczymy iloczyn skalarny kazdej pary wektorow
    dot_products = vectors.dot(vectors.T)
    # nastepnie dzielimy zarowno wiersze jak i kolumny przez dlugosci wektorow - przyda sie zmienna lengths oraz funkcja divide()
    similarity = (dot_products / lengths).T / (lengths).T

    return similarity#pandas.DataFrame(np.nan_to_num(similarity))

cosine_similarity(movies_model)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
1,1.000000,0.952502,0.666008,0.290454,0.683396,0.912038,0.672210,0.105879,0.455295,0.938123,...,0.364011,0.316189,0.288810,0.283780,0.397300,0.277878,0.283749,0.208972,0.301970,0.025156
2,0.952502,1.000000,0.663771,0.264357,0.709747,0.879365,0.604240,0.017498,0.494816,0.893400,...,0.392372,0.352506,0.324957,0.330646,0.395928,0.321074,0.284905,0.229546,0.350133,0.039097
3,0.666008,0.663771,1.000000,0.235834,0.564759,0.575587,0.467000,0.254309,0.617336,0.617393,...,0.320597,0.318313,0.256361,0.268639,0.336563,0.247455,0.255842,0.210624,0.266799,0.135820
4,0.290454,0.264357,0.235834,1.000000,0.267123,0.327058,0.286469,0.525509,0.351576,0.272423,...,0.816203,0.739444,0.747081,0.740809,0.766175,0.732496,0.785050,0.772131,0.737011,0.400568
5,0.683396,0.709747,0.564759,0.267123,1.000000,0.786195,0.711702,0.160839,0.616950,0.699879,...,0.325677,0.310289,0.260490,0.265523,0.353889,0.300758,0.277944,0.209899,0.310780,0.048794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193581,0.277878,0.321074,0.247455,0.732496,0.300758,0.304188,0.078247,0.278219,0.294759,0.331123,...,0.939250,0.938259,0.964073,0.934053,0.953416,1.000000,0.935666,0.928810,0.943292,0.505373
193583,0.283749,0.284905,0.255842,0.785050,0.277944,0.280905,0.126043,0.390442,0.272170,0.315506,...,0.942902,0.927962,0.922408,0.930302,0.949390,0.935666,1.000000,0.927571,0.961145,0.435550
193585,0.208972,0.229546,0.210624,0.772131,0.209899,0.220397,0.099953,0.351367,0.260413,0.209872,...,0.929464,0.927145,0.956659,0.944132,0.943338,0.928810,0.927571,1.000000,0.926463,0.440558
193587,0.301970,0.350133,0.266799,0.737011,0.310780,0.318578,0.134191,0.384204,0.326258,0.331294,...,0.952453,0.937030,0.953199,0.952230,0.956064,0.943292,0.961145,0.926463,1.000000,0.424071


In [9]:
# teraz mozemy znalexc k elementow najbardziej podobnych do danego

def k_most_similar(vectors, i, k):
    sim_matrix = cosine_similarity(vectors)
    return np.argsort(sim_matrix[i])[-k:]

k_most_similar(movies, 193587, 8)

193571    8179
193573    3099
193579    7104
193581    9314
193583    5247
193585    2380
193587    2938
193609    9532
Name: 193587, dtype: int64

## Część 4. - Item2Item collaborative filtering

In [10]:
# sprobujmy innego podejscia - Item2Item CF przewiduje rating tylko na podstawie macierzy ratingow, bez koniecznosci trenowania
#   dodatkowych macierzy

# zauwaz, ze nie chcemy przeprowadzac obliczen tam, gdzie brakuje nam elementow
#   - oblicz macierz ratings z parametrem nan=True oraz wykorzystaj tzw. masked arrays: np.ma.array(x, mask=np.isnan(x))
#   w ten sposob unikniesz przeprowadzania niepotrzebnych obliczen

def item_to_item(ratings):
    similarity = cosine_similarity(ratings.T) # prawdopodobnie bedziesz musial zmodyfikowac te funkcje, by obslugiwala NaN
    sums = similarity.sum(axis=1)
    model = ratings @ similarity  / sums# srednia ocen wystawionych przez uzytkownika wazona podobienstwem elementow
    return model * 5 #pandas.DataFrame(np.nan_to_num(model))
model = item_to_item(ratings)

In [11]:
model

movieId,1,2,3,4,5,6,7,8,9,10,...,193565,193567,193571,193573,193579,193581,193583,193585,193587,193609
userId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1.478179,1.288708,1.359496,1.226355,1.186815,1.465100,1.113405,0.943973,1.165456,1.547580,...,0.033235,0.033235,0.033235,0.033235,0.033235,0.033235,0.033235,0.033235,0.033235,0.768191
2,0.125941,0.118998,0.072437,0.044473,0.101857,0.109970,0.058550,0.075006,0.042909,0.111254,...,0.266695,0.266695,0.266695,0.266695,0.266695,0.266695,0.266695,0.266695,0.266695,1.136075
3,0.051245,0.048424,0.053669,0.041422,0.042033,0.058762,0.039665,0.033183,0.065373,0.062436,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,0.858488,0.671828,0.705518,1.391461,0.721161,0.803584,0.806757,0.517902,0.559046,0.778464,...,0.044375,0.044375,0.044375,0.044375,0.044375,0.044375,0.044375,0.044375,0.044375,0.701785
5,0.298135,0.283379,0.264191,1.148249,0.343739,0.271051,0.331711,0.202052,0.261134,0.345065,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.248408
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
606,3.457960,2.780642,2.706345,3.852857,2.837555,3.037930,4.254411,2.095883,1.897695,2.988703,...,0.295077,0.295077,0.295077,0.295077,0.295077,0.295077,0.295077,0.295077,0.295077,4.566042
607,1.021012,0.875949,0.930456,1.351934,0.817483,0.945640,0.803247,0.619679,0.908160,1.066485,...,0.019941,0.019941,0.019941,0.019941,0.019941,0.019941,0.019941,0.019941,0.019941,0.320774
608,3.260427,3.080690,3.240695,2.417777,2.849020,3.183611,2.581697,2.504257,2.648691,3.815777,...,0.148001,0.148001,0.148001,0.148001,0.148001,0.148001,0.148001,0.148001,0.148001,7.379583
609,0.193076,0.180268,0.185901,0.501623,0.207705,0.173439,0.203102,0.131923,0.269706,0.268224,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.124156


## Część 5. - porównanie algorytmów

In [12]:
# korzystając z funkcji z poprzedniego laboratorium, porownaj dwa zaimplementowane algorytmy Collaborative Filtering
positive_threshold = 4.0
negative_threshold = 2.0

def calculate_stats(test_ratings_set, predicted_ratings, positive_threshold, negative_threshold):
    true_positives = 0
    true_negatives = 0 
    false_positives = 0 
    false_negatives = 0
    for index, row in test_ratings_set.iterrows():
        userId, movieId, rating = row['userId'], row['movieId'], row['rating']
        try:
            predicted_rating = predicted_ratings[int(movieId)][int(userId)]
        except:
            continue
        is_prediction_positive = predicted_rating >= positive_threshold
        is_true_value_positive = rating >= positive_threshold
        is_prediction_negative = predicted_rating <= negative_threshold
        is_true_value_negative = rating <= negative_threshold
        if is_prediction_positive:
            if is_true_value_positive:
                true_positives += 1
            elif is_true_value_negative: 
                false_positives += 1
        elif is_prediction_negative:
            if is_true_value_negative:
                true_negatives += 1
            elif is_true_value_positive: 
                false_negatives += 1
        
    recall = true_positives / (true_positives + false_negatives)
    precision = true_positives / (true_positives + false_positives)
    f1 =  2 * ((precision * recall) / (precision + recall))
    accuracy = (true_positives + true_negatives) / (true_positives + false_positives + false_negatives + true_negatives)
    
    return {
        'true_positives': true_positives,
        'true_negatives': true_negatives,
        'false_positives': false_positives,
        'false_negatives': false_negatives,
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

calculate_stats(test_ratings_set, model, positive_threshold, negative_threshold)

{'true_positives': 190,
 'true_negatives': 411,
 'false_positives': 98,
 'false_negatives': 1694,
 'accuracy': 0.2511491851232762,
 'precision': 0.6597222222222222,
 'recall': 0.10084925690021232,
 'f1': 0.17495395948434622}

In [13]:
predicted_ratings = cosine_similarity(movies_model).dot(movies).dot(users.T).T
calculate_stats(test_ratings_set, predicted_ratings, positive_threshold, negative_threshold)

{'true_positives': 2352,
 'true_negatives': 0,
 'false_positives': 670,
 'false_negatives': 0,
 'accuracy': 0.7782925215089345,
 'precision': 0.7782925215089345,
 'recall': 1.0,
 'f1': 0.8753256419799033}