In [1]:
import numpy as np
import pandas as pd

In [24]:
import jdc

In [6]:
data = pd.read_csv("C:/Users/HOME/문서/한양대/3-2/산업공학연구실현장실습2/datas/data_new.csv",index_col=0)
interactions_df = data[['AUTH_CUSTOMER_ID',"PRODUCT_CODE"]]

Index the interaction data and user/item features and initialize model weights
* param `interactions`: dataframe of observed user/item interactions: [user_id, item_id]
* param `user_features`: dataframe of user metadata features: [user_id, uf_1, ..., uf_n]
* param `item_features`: dataframe of item metadata features: [item_id, if_1, ..., if_n]
* param `sample_weight`: vector of importance weights for each observed interaction
return: None

In [25]:
class RankFM():
    def __init__(self, factors=10, loss='bpr', max_samples=10, alpha=0.01, beta=0.1, sigma=0.1, learning_rate=0.1, learning_schedule='constant', learning_exponent=0.25):
        # store model hyperparameters
        self.factors = factors
        self.loss = loss
        self.max_samples = max_samples
        self.alpha = alpha
        self.beta = beta
        self.sigma = sigma
        self.learning_rate = learning_rate
        self.learning_schedule = learning_schedule
        self.learning_exponent = learning_exponent

        # set/clear initial model state
        self._reset_state()

In [26]:
%%add_to RankFM
def _reset_state(self):
    # [ID, IDX] arrays
    self.user_id = None
    self.item_id = None
    self.user_idx = None
    self.item_idx = None

    # [ID <-> IDX] mappings
    self.index_to_user = None
    self.index_to_item = None
    self.user_to_index = None
    self.item_to_index = None

    # user/item interactions and importance weights
    self.interactions = None
    self.sample_weight = None

    # set of observed items for each user
    self.user_items = None

    # [user, item] features
    self.x_uf = None
    self.x_if = None

    # [item, item-feature] scalar weights
    self.w_i = None
    self.w_if = None

    # [user, item, user-feature, item-feature] latent factors
    self.v_u = None
    self.v_i = None
    self.v_uf = None
    self.v_if = None

    # internal model state indicator
    self.is_fit = False

In [46]:
%%add_to RankFM
def _init_all(self, interactions, user_features=None, item_features=None, sample_weight=None):
    # save unique arrays of users/items in terms of original identifiers
    interactions_df = pd.DataFrame(get_data(interactions), columns=['user_id', 'item_id'])
    self.user_id = pd.Series(np.sort(np.unique(interactions_df['user_id'])))
    self.item_id = pd.Series(np.sort(np.unique(interactions_df['item_id'])))

    # create reverse mappings from identifiers to zero-based index positions
    self.user_to_index = pd.Series(data=self.user_id.index, index=self.index_to_user.values)
    self.item_to_index = pd.Series(data=self.item_id.index, index=self.index_to_item.values)

    # store unique values of user/item indexes and observed interactions for each user
    self.user_idx = np.arange(len(self.user_id), dtype=np.int32)
    self.item_idx = np.arange(len(self.item_id), dtype=np.int32)

    # map the interactions to internal index positions
    self._init_interactions(interactions, sample_weight)

    # map the user/item features to internal index positions
    self._init_features(user_features, item_features)

    # initialize the model weights after the user/item/feature dimensions have been established
    self._init_weights(user_features, item_features)

In [47]:
model = RankFM(factors=20, loss='warp', max_samples=20, alpha=0.01, sigma=0.1, learning_rate=0.10, learning_schedule='invscaling')

In [48]:
model._init_all(interactions_df)

AttributeError: 'NoneType' object has no attribute 'values'

In [None]:
def _init_interactions(self, interactions, sample_weight):
        """map new interaction data to existing internal user/item indexes

        :param interactions: dataframe of observed user/item interactions: [user_id, item_id]
        :param sample_weight: vector of importance weights for each observed interaction
        :return: None
        """

        assert isinstance(interactions, (np.ndarray, pd.DataFrame)), "[interactions] must be np.ndarray or pd.dataframe"
        assert interactions.shape[1] == 2, "[interactions] should be: [user_id, item_id]"

        # map the raw user/item identifiers to internal zero-based index positions
        # NOTE: any user/item pairs not found in the existing indexes will be dropped
        self.interactions = pd.DataFrame(get_data(interactions).copy(), columns=['user_id', 'item_id'])
        self.interactions['user_id'] = self.interactions['user_id'].map(self.user_to_index).astype(np.int32)
        self.interactions['item_id'] = self.interactions['item_id'].map(self.item_to_index).astype(np.int32)
        self.interactions = self.interactions.rename({'user_id': 'user_idx', 'item_id': 'item_idx'}, axis=1).dropna()

        # store the sample weights internally or generate a vector of ones if not given
        if sample_weight is not None:
            assert isinstance(sample_weight, (np.ndarray, pd.Series)), "[sample_weight] must be np.ndarray or pd.series"
            assert sample_weight.ndim == 1, "[sample_weight] must a vector (ndim=1)"
            assert len(sample_weight) == len(interactions), "[sample_weight] must have the same length as [interactions]"
            self.sample_weight = np.ascontiguousarray(get_data(sample_weight), dtype=np.float32)
        else:
            self.sample_weight = np.ones(len(self.interactions), dtype=np.float32)

        # create a dictionary containing the set of observed items for each user
        # NOTE: if the model has been previously fit extend rather than replace the itemset for each user

        if self.is_fit:
            new_user_items = self.interactions.groupby('user_idx')['item_idx'].apply(set).to_dict()
            self.user_items = {user: np.sort(np.array(list(set(self.user_items[user]) | set(new_user_items[user])), dtype=np.int32)) for user in self.user_items.keys()}
        else:
            self.user_items = self.interactions.sort_values(['user_idx', 'item_idx']).groupby('user_idx')['item_idx'].apply(np.array, dtype=np.int32).to_dict()

        # format the interactions data as a c-contiguous integer array for cython use
        self.interactions = np.ascontiguousarray(self.interactions, dtype=np.int32)

In [45]:
def get_data(obj):
    if obj.__class__.__name__ in ('DataFrame', 'Series'):
        data = obj.values
    elif obj.__class__.__name__ == 'ndarray':
        data = obj
    else:
        raise TypeError("input data must be in either pd.dataframe/pd.series or np.ndarray format")
    return data