In [16]:
import scipy.stats as sps
from scipy.interpolate import interp1d
import numpy as np

import pandas as pd
import math
import random
import sklearn

from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse.linalg import svds
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import scipy
from tqdm import tqdm

from libmf import mf

In [9]:
def usefulness(c, w, noice):
    return np.arctan(c - w + noice) / np.pi + 0.5

In [10]:
customer_distribution = sps.norm(0.6, 0.2)
w_distribution = sps.norm(0, 0.4)

epsilon = sps.norm(0, 0.05)

In [11]:
def interpol_distribution(sample):
    hst = np.histogram(sample, density=True, bins=200)
    return interp1d(hst[1][:-1], hst[0], kind='linear',
                               fill_value=0.0, bounds_error=False)

In [41]:
def zero_step(model: CMFRecommender, user_info, item_info, topn=52):
    new_feedback = []

    maximal_user, maximal_item = model.get_max_index()
    old_users = model.get_users().set_index("UserId")
    
    for i in range(topn):
        user_id = np.random.choice(round(maximal_user) - 1)
        for index, item_row in item_info.iterrows():
            deal = sps.bernoulli.rvs(usefulness(old_users.loc[user_id], item_row["F"], epsilon.rvs()))  # моделируем сделки
            new_feedback.append((user_id, item_row["ItemId"], deal))
    
    for index, user_row in user_info.iterrows():
        w_offered = model.recommend_items_cold(user_row["F"], topn)["ItemId"]
        for w in w_offered:
            deal = sps.bernoulli.rvs(usefulness(user_row["F"], w, epsilon.rvs()))  # моделируем сделки
            new_feedback.append((user_row["UserId"], w, deal))
    
    model.retrain(new_feedback, user_info, item_info)
    return model, new_feedback


NameError: name 'CMFRecommender' is not defined

In [48]:
import numpy as np
import pandas as pd
from cmfrec import CMF_implicit



class CMFRecommender:
    def __init__(self, num_of_factors=40):
        self.model = CMF_implicit(k=num_of_factors)
        self.ratings = None
        self.trained = False
        self.user_info = None
        self.item_info = None

    def get_users(self):
        return self.user_info

    def fit(self, ratings, user_info, item_info):
        self.trained = True
        self.ratings = ratings
        self.user_info = user_info
        self.item_info = item_info
        self.model.fit(X=ratings, U=user_info, I=item_info)

    def get_interacted_items(self, user_id):
        return self.ratings.loc[self.ratings.UserId == user_id]['ItemId'].unique()

    def get_unqiue_item_count(self):
        return len(self.ratings["ItemId"].unique())

    def recommend_items_new(self, user_id, I, topn):
        recommended_items = self.model.topN_new(user=user_id, n=topn, output_score=True)[:2]
        return pd.DataFrame({"ItemId": recommended_items[0], "Rating": recommended_items[1]})

    def recommend_items_cold(self, user_row, topn=10):
        n = min(topn, self.get_unqiue_item_count())
        recommended_items = self.model.topN_cold(U=user_row, n=n, output_score=True)[:2]
        return pd.DataFrame({"ItemId": recommended_items[0], "Rating": recommended_items[1]})

    def get_max_index(self):
        return self.ratings["UserId"].max(), self.ratings["ItemId"].max()

    def recommend_items(self, user_id=None, topn=10, exclude_rated=True):
        items_to_ignore = []
        if exclude_rated:
            items_to_ignore.extend(self.get_interacted_items(user_id))
        n = min(topn, self.get_unqiue_item_count() - len(items_to_ignore))

        recommended_items = self.model.topN(user=user_id, n=n, output_score=True)[:2]

        return pd.DataFrame({"ItemId": recommended_items[0], "Rating": recommended_items[1]})

    def retrain(self, new_ratings, new_users, new_items):
        number_of_new_ratings = len(new_ratings)
        new_ratings = pd.DataFrame(new_ratings, columns =['UserId', 'ItemId', 'Rating'])
        if self.ratings is None:
            self.ratings = new_ratings
        else:
            self.ratings = pd.concat([self.ratings.loc[number_of_new_ratings:], new_ratings], ignore_index=True)
        self.user_info = pd.concat([self.user_info, new_users])
        self.item_info = pd.concat([self.item_info, new_items])

        self.fit(ratings=self.ratings, user_info=self.user_info, item_info=self.item_info)

In [62]:
def dynamic_system_iterate(model: CMFRecommender, customer_distribution, w_distribution, c_size=10, w_size=10, num_of_steps=5,
                           topn=5, delta=1):
    
    maximal_user, maximal_item = model.get_max_index()
    user_info = pd.DataFrame({"F": customer_distribution.rvs(size=c_size)})  # size = (c_size, c_feature_size) в многомерном случае 
    user_info["UserId"] = np.arange(maximal_user + 1, maximal_user + 1  + c_size)

    item_info = pd.DataFrame({"F": w_distribution.rvs(size=w_size)})  # size = (w_size, w_feature_size) в многомерном случае 
    item_info["ItemId"] = np.arange(maximal_item + 1, maximal_item + 1 + w_size)
    model, new_feedback = zero_step(model, user_info, item_info, topn=topn)
    
    for step in range(1, num_of_steps + 1):
        for index, user_row in user_info.iterrows():
            items_interacted = model.get_interacted_items(user_row["UserId"])
            w_offered = model.recommend_items(user_row["UserId"], topn=topn)["ItemId"]
            # w_offered = model.recommend_items(user_row["UserId"], topn=topn, include=np.setdiff1d(item_info["ItemId"], items_interacted))["ItemId"]
            for w in w_offered:
                deal = sps.bernoulli.rvs(usefulness(user_row["F"], w, epsilon.rvs()))  # моделируем сделки
                new_feedback.append((user_row["UserId"], w, deal))
        model.retrain(new_feedback, pd.DataFrame(), pd.DataFrame())
    
    
    # смена распределения
    new_feedback_df = pd.DataFrame(new_feedback, columns=['UserId', 'ItemId', 'Feedback'])
    grouped_users = new_feedback_df.groupby('UserId')['Feedback'].mean().reset_index()
    # probabilities_users = grouped_users['Feedback'] / grouped_users['Feedback'].sum()
    user_info.set_index('UserId', inplace=True)
    customer_distribution = sps.gaussian_kde(user_info.loc[grouped_users['UserId']], grouped_users['Feedback'])
    grouped_items = new_feedback_df.groupby('ItemId')['Feedback'].mean().reset_index()
    # probabilities_items = grouped_items['ItemId'] / grouped_items['ItemId'].sum()
    grouped_items['Feedback'] += delta
    item_info.set_index('ItemId', inplace=True)
    w_distribution = sps.gaussian_kde(item_info.loc[grouped_users['ItemId']], grouped_items['Feedback'])

    return customer_distribution, w_distribution, new_feedback, model


In [60]:
user_info = pd.DataFrame({"F": customer_distribution.rvs(size=100)}) # генерим датасет для нулевой итерации
user_info["UserId"] = np.arange(100)

item_info = pd.DataFrame({"F": w_distribution.rvs(size=100)})
item_info["ItemId"] = np.arange(100)
feedback = []

for i, user_row in user_info.iterrows():
    for j, item_row in item_info.iterrows():
        deal = sps.bernoulli.rvs(usefulness(user_row["F"], item_row["F"], epsilon.rvs()))
        feedback.append((user_row["UserId"], item_row["ItemId"], deal))

model = CMFRecommender()            
model.retrain(feedback, user_info, item_info)

In [61]:
dynamic_system_iterate(model, customer_distribution, w_distribution)

ValueError: Number of dimensions is greater than number of samples. This results in a singular data covariance matrix, which cannot be treated using the algorithms implemented in `gaussian_kde`. Note that `gaussian_kde` interprets each *column* of `dataset` to be a point; consider transposing the input to `dataset`.