In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
cd ..

/Users/willwolf/Documents/dotify


In [19]:
import numpy as np
import pandas as pd
from sqlalchemy import create_engine

from dotify.config import DevelopmentConfig

In [39]:
ENGINE = create_engine(DevelopmentConfig.SQLALCHEMY_DATABASE_URI)
TOP_SONGS_QUERY = """
    SELECT 
        country_id, 
        song_id, 
        1. / SUM(rank) as normalized_rank
    FROM top_songs
    GROUP BY 1, 2"""

In [40]:
top_songs_df = pd.read_sql(TOP_SONGS_QUERY, ENGINE)

### Implicit MF

In [43]:
R_ui = top_songs_df.pivot('country_id', 'song_id', 'normalized_rank').fillna(0)

In [118]:
ALPHA = 40
F = 30
LAMBDA = 1

In [247]:
class UserVectors:
    
    def __init__(self, R_ui, f=F):
        self.vectors = pd.DataFrame(
            np.random.randn(R_ui.shape[0], f),
            columns=range(f),
            index=R_ui.index
        )
        
class ItemVectors:
    
    def __init__(self, R_ui, f=F):
        self.vectors = pd.DataFrame(
            np.random.randn(R_ui.shape[1], f),
            columns=range(f),
            index=R_ui.columns
        )

In [271]:
class ImplicitMF:
    
    def __init__(self, user_vectors_class, item_vectors_class, R_ui=R_ui, alpha=ALPHA, lmbda=LAMBDA, 
                 n_iterations=10):
        self.user_vectors = user_vectors_class(R_ui)
        self.item_vectors = item_vectors_class(R_ui)
        self.P_ui = R_ui > 0
        self.C_ui = 1 + ALPHA*R_ui
        self.lmbda = LAMBDA
        self.n_iterations = n_iterations
        
    def run(self):
        for i in range(self.n_iterations):
            print(i)
            self._update_user_vectors()
            self._update_item_vectors()
        
            
    def _update_user_vectors(self):
        YtY = self._compute_ZtZ(self.item_vectors.vectors)
        for user in self.user_vectors.vectors.index:
            Cu = np.diag(self.C_ui.ix[user])
            Pu = self.P_ui.ix[user]
            YtCuY = self._compute_ZtCuZ(YtY, self.item_vectors.vectors, Cu)
            Xu = self._compute_updated_record(self.item_vectors.vectors, YtCuY, Cu, Pu)
            
            self.user_vectors.vectors.ix[user] = Xu
            
    def _update_item_vectors(self):
        XtX = self._compute_ZtZ(self.user_vectors.vectors)
        for item in self.item_vectors.vectors.index:
            Cu = np.diag(self.C_ui[item])
            Pu = self.P_ui[item]
            XtCuX = self._compute_ZtCuZ(XtX, self.user_vectors.vectors, Cu)
            Yu = self._compute_updated_record(self.user_vectors.vectors, XtCuX, Cu, Pu)
            
            self.item_vectors.vectors.ix[item] = Yu
        
    def _compute_ZtCuZ(self, ZtZ, vectors, Cu):
        I = self._compute_I(len(Cu))
        return ZtZ + np.dot(
            vectors.T, 
            np.dot(Cu - I, vectors)
        )
    
    def _compute_updated_record(self, vectors, ZtCuZ, Cu, Pu):
        I = self._compute_I(len(ZtCuZ))
        return np.dot(
            np.dot(
                np.dot(
                    np.linalg.inv(ZtCuZ + self.lmbda*I), vectors.T
                ),
                Cu
            ),
            Pu
        )
    
    @staticmethod
    def _compute_ZtZ(vectors):
        return np.dot(vectors.T, vectors)
    
    @staticmethod
    def _compute_I(size):
        return np.eye(size)

In [272]:
implicit_mf = ImplicitMF(UserVectors, ItemVectors)

In [273]:
implicit_mf.run()

0
1
2
3
4
5
6
7
8
9
