In [1]:
import numpy as np
import pandas as pd

from UserSimilarity import COS
from UserSimilarity import PCC

from RatingMatrix import MovieLens_pivot
# MovieLens_pivot: rating이 많은 사용자 1,000명, 아이템 1,000개로 구성된 1000x1000 numpy array

In [2]:
def basic_baseline(data, sim, k):
    # initializing (1000, 1000) numpy array with zeros
    predicted_rating = np.zeros(data.shape)

    # calculating means
    mean = np.nanmean(np.where(data != 0, data, np.nan))  # the mean of all ratings
    mean_u = np.nanmean(np.where(data != 0, data, np.nan), axis=1)  # the mean of all users
    mean_i = np.nanmean(np.where(data != 0, data, np.nan), axis=0)  # the mean of all items

    # base user, item
    b_u = mean_u - mean # users' baseline
    b_i = mean_i - mean # items' baseline

    # selecting similarity function
    if sim == 'COS':
        sim = COS(data)
    elif sim == 'PCC':
        sim = PCC(data)

    # selecting top k users by sorting similarity array
    k_users = np.argsort(-sim)
    k_users = np.delete(k_users, np.s_[k:], 1)  # delete colomn under k

    # number of users with axis = 0 condition
    num_users = np.size(data, axis=0)
    num_items = np.size(data, axis=1)

    # calculating predicted ratings
    for u in range(0, num_users):
        list_sim = sim[u, k_users[u]]  # selecting top k similarity
        for i in range(0, num_items):
            list_rating = data[k_users[u], i].astype('float64')  # k users' ratings on item i

            b_ui = mean + b_u[u] + b_i[i]  # scalar
            # b_u[u]: user u의 baseline
            b_vi = mean + b_u[k_users[u]] + b_i[i]  # list
            # b_i[i]: item i의 baseline

            # calculation
            mom = np.sum(list_sim)  # 분모
            son = np.sum(list_sim * (list_rating - b_vi))  # 분자
            predicted_rating[u, i] = b_ui + son / mom

    return predicted_rating

In [3]:
# User-based & COS & k = 2
pd.DataFrame(basic_baseline(MovieLens_pivot, 'COS', 2))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,1.816553,1.816553,1.816553,-0.680315,-0.680315,1.317180,-0.680315,-0.680315,3.820311,4.319685,...,-0.680315,-0.680315,-0.680315,-0.680315,-0.680315,-0.680315,-0.680315,-0.680315,-0.680315,1.317180
1,3.421797,3.922534,3.421797,2.922534,3.423270,0.421797,0.925480,2.423270,3.922534,1.919588,...,0.925480,-0.576730,-0.576730,-0.576730,-0.576730,-0.576730,-0.576730,-0.576730,1.919588,-0.576730
2,4.586480,5.086484,5.086484,2.086467,3.586488,4.586480,2.086467,2.086467,5.086484,5.086484,...,2.086467,2.086467,0.086484,0.086484,0.086484,0.086484,0.086484,0.086484,0.086484,2.086501
3,1.135598,-0.864693,1.135598,-0.864693,-0.864693,-0.864693,-0.864693,1.135598,3.135162,3.635235,...,-0.864693,-0.864693,-0.864693,-0.864693,1.135598,-0.864693,-0.864693,-0.864693,-0.864693,-0.864693
4,3.128561,3.628865,1.127343,0.630082,0.630082,3.129169,0.630082,1.130387,3.129169,3.628865,...,-1.371135,-1.371135,-1.371135,-1.371135,-1.371135,-1.371135,-1.371135,-1.371135,-1.371135,0.627648
5,1.316994,0.817548,0.817548,0.817548,-0.181346,1.316994,0.817548,0.817548,3.819761,3.819761,...,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239,-1.180239
6,3.690046,1.191169,3.690046,0.689148,1.188923,0.690945,1.188923,1.188923,3.189822,3.690046,...,-1.309954,-1.309954,-1.309954,0.689148,-1.309954,0.189372,-1.309954,-1.309954,-1.309954,0.690945
7,1.273142,1.273142,1.273142,0.774730,1.273142,1.273142,1.273142,-1.218916,3.781084,1.289027,...,-1.218916,-1.218916,-1.218916,-1.218916,-1.218916,0.276319,-1.218916,-1.218916,-1.218916,-1.218916
8,-1.654667,0.337459,0.337459,0.835491,-1.654667,-1.654667,-1.654667,-1.654667,2.847301,3.345333,...,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667,-1.654667
9,1.394674,1.893996,1.893996,-0.602618,1.394674,1.394674,-0.602618,-0.602618,4.397382,4.397382,...,-0.602618,-0.602618,-0.602618,-0.602618,-0.602618,-0.602618,-0.602618,-0.602618,-0.602618,1.394674


In [4]:
# User-based & PCC & k = 2
pd.DataFrame(basic_baseline(MovieLens_pivot, 'PCC', 2))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,4.233158,3.739606,4.739606,3.739606,4.726711,3.752500,4.233158,3.739606,5.233158,5.233158,...,2.258947,0.233158,0.233158,2.258947,0.233158,1.246053,1.246053,2.258947,2.207370,2.258947
1,5.114904,2.661405,2.152105,4.114904,2.152105,4.605603,5.114904,4.624204,5.114904,2.568402,...,0.114904,1.096303,1.587003,0.114904,0.114904,0.114904,1.642804,0.114904,0.114904,0.114904
2,5.831504,2.775928,2.775928,4.345398,2.775928,5.345398,5.831504,4.831504,3.400974,5.345398,...,2.775928,1.859292,2.373186,0.831504,0.831504,2.289822,0.831504,0.831504,0.831504,0.831504
3,4.536255,4.536255,4.536255,3.536255,3.989836,1.804159,4.536255,3.989836,0.629094,3.536255,...,-0.463745,-0.463745,-0.463745,-0.463745,-0.463745,-0.463745,0.896997,1.350578,-0.463745,-0.463745
4,3.861668,3.861668,3.367079,3.861668,3.861668,3.356258,3.861668,3.861668,1.334615,1.334615,...,-1.138332,-1.138332,-1.138332,-1.138332,-1.138332,-1.138332,1.861668,0.883311,-1.138332,-1.138332
5,4.190832,3.704852,3.190832,3.190832,2.704852,4.162792,4.190832,3.704852,4.190832,-0.323188,...,-0.323188,-0.323188,-0.323188,-0.323188,-0.323188,-0.323188,1.218872,-0.323188,0.648772,-0.323188
6,3.066434,3.066434,1.568407,2.564461,3.066434,2.062487,-0.439486,2.050647,1.568407,3.058540,...,1.066434,-0.439486,-0.439486,-0.439486,0.556567,-0.439486,0.564461,-0.439486,1.568407,-0.439486
7,4.337014,1.861947,1.356960,3.337014,1.861947,4.337014,4.337014,3.842000,4.337014,4.337014,...,-0.662986,0.327041,0.822054,-0.662986,-0.662986,-0.662986,-0.662986,-0.662986,-0.662986,-0.662986
8,3.272006,3.272006,1.282107,1.777057,3.272006,2.787158,2.777057,1.282107,2.302310,3.282107,...,-0.222943,-0.222943,0.766956,-0.222943,0.272006,-0.222943,-0.222943,-0.222943,-0.222943,-0.222943
9,4.833342,2.866210,2.360732,4.338820,1.855254,5.338820,4.833342,4.338820,5.338820,2.811431,...,0.338820,1.327865,1.822387,0.338820,0.338820,0.338820,0.338820,0.338820,0.338820,0.338820


In [5]:
# Item-based & COS & k = 2
pd.DataFrame(basic_baseline(MovieLens_pivot.T, 'COS', 2))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,4.825905,3.321843,4.825905,4.825905,3.327260,1.831322,3.825905,3.825905,3.329968,4.825905,...,4.825905,4.825905,-0.174095,4.825905,3.823197,4.825905,1.831322,-0.174095,2.332677,4.324551
1,5.064456,5.064456,5.064456,5.064456,3.565575,2.068931,3.563338,4.064456,3.066694,4.563338,...,5.064456,5.064456,0.064456,5.064456,4.563338,5.064456,2.068931,2.068931,0.064456,4.062219
2,4.679494,2.181668,4.679494,2.181668,1.180798,1.178190,1.681233,4.179059,0.680364,4.179929,...,4.179929,2.181668,-0.320506,2.181668,2.181668,2.181668,1.178190,2.177320,-0.320506,2.181668
3,4.030906,3.531667,4.531160,3.030906,2.531160,3.531667,4.031413,1.030400,2.031413,4.531160,...,1.530146,1.530146,0.530653,1.532173,-0.468840,4.031413,1.532173,1.530146,1.532173,1.532173
4,4.627157,3.127538,4.627157,4.627157,3.627157,3.627157,3.127031,3.627157,4.127031,4.127031,...,4.627157,4.627157,-0.372843,4.627157,3.127284,4.627157,3.627157,1.627664,2.126524,3.127031
5,2.218833,4.221555,4.221555,2.218833,2.722235,-0.277765,1.719514,1.719514,3.224277,2.218833,...,4.722235,2.218833,-0.277765,2.218833,4.722235,2.218833,-0.277765,-0.277765,-0.277765,4.221555
6,3.379884,4.379884,4.879147,4.379884,4.879147,2.382831,2.879147,2.879147,2.378411,3.877674,...,3.379884,2.375463,-0.120853,-0.120853,1.882094,4.879147,2.382831,-0.120853,-0.120853,2.382831
7,3.980871,3.980871,4.481094,4.481094,2.481094,-0.518906,2.980871,2.480648,1.481094,3.980871,...,1.979979,4.481094,-0.518906,3.980871,1.979979,4.481094,-0.518906,0.981763,-0.518906,1.979979
8,2.512886,2.015062,4.513853,0.014095,1.514820,1.513370,0.014095,2.512886,2.515303,2.013128,...,4.514337,0.014095,0.014095,0.014095,2.515303,0.014095,1.513370,2.512886,0.014095,2.015062
9,2.473576,4.468727,4.468727,0.969939,1.965090,1.972364,1.471151,-0.032486,4.466302,2.473576,...,2.461452,-0.032486,-0.032486,-0.032486,4.466302,-0.032486,-0.032486,2.473576,-0.032486,4.468727


In [6]:
# Item-based & PCC & k = 2
pd.DataFrame(basic_baseline(MovieLens_pivot.T, 'PCC', 2))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,990,991,992,993,994,995,996,997,998,999
0,5.055508,2.529568,4.581448,5.055508,3.581448,3.581448,3.581448,4.055508,4.107388,4.581448,...,5.055508,4.581448,1.951747,4.581448,3.529568,4.581448,3.581448,0.055508,4.581448,4.055508
1,5.292949,4.379330,4.836140,5.292949,3.836140,3.836140,3.292949,4.292949,3.836140,4.292949,...,5.292949,4.836140,2.120188,4.836140,4.292949,4.836140,3.836140,2.465710,2.120188,3.749759
2,4.310875,3.804445,4.804445,3.804445,2.804445,4.804445,2.323734,4.298016,1.791586,4.298016,...,0.804445,0.804445,1.310875,2.285157,1.791586,0.804445,2.804445,1.310875,2.830164,0.804445
3,3.641821,4.126880,4.096997,3.096997,3.641821,1.641821,0.611938,3.582056,1.126880,4.126880,...,0.611938,1.641821,1.126880,0.611938,0.611938,0.611938,3.126880,0.611938,0.611938,0.611938
4,4.629891,3.181874,4.629891,4.629891,3.629891,3.629891,3.112563,3.629891,4.112563,4.112563,...,4.629891,4.629891,-0.370109,4.629891,3.147219,4.629891,3.629891,1.699202,2.043253,3.112563
5,2.194735,2.682987,2.697084,2.194735,1.692385,0.687686,1.692385,3.190036,1.682987,0.687686,...,2.697084,0.687686,0.687686,0.687686,0.687686,0.687686,0.687686,0.687686,0.687686,0.687686
6,3.397403,4.397403,4.878686,4.397403,4.878686,2.472267,2.878686,2.878686,2.359970,3.841254,...,3.397403,2.285106,-0.121314,-0.121314,1.953551,4.878686,2.472267,-0.121314,-0.121314,2.472267
7,4.135974,2.685145,5.234315,5.685145,3.586804,2.685145,4.234315,3.586804,0.685145,4.685145,...,2.783486,2.488463,0.685145,2.037633,1.135974,2.939292,3.234315,0.685145,0.685145,1.783486
8,4.643245,5.149232,4.647236,1.643245,2.141249,2.141249,3.647236,2.141249,2.141249,3.647236,...,3.137258,0.647236,2.655220,1.651228,0.647236,0.647236,0.647236,3.137258,0.647236,0.647236
9,2.655264,5.197876,5.197876,1.180831,0.689353,2.163787,3.197876,0.197876,4.706398,5.197876,...,0.197876,0.197876,0.197876,0.197876,4.197876,1.214920,0.197876,2.655264,0.197876,2.655264
