In [1]:
import pickle as pkl
import numpy as np
from datetime import datetime
import pymongo

db = pymongo.MongoClient()['ltcLongevity']

DAYS_ID_DICT = [{'id': 0, 'label': 'Пн'},
 {'id': 1, 'label': 'Вт'},
 {'id': 2, 'label': 'Ср'},
 {'id': 3, 'label': 'Чт'},
 {'id': 4, 'label': 'Пт'},
 {'id': 5, 'label': 'Сб'},
 {'id': 6, 'label': 'Вс'}]
DAYS_ID_DICT = {
    item['id']: item['label'] for item in DAYS_ID_DICT
}

DISTRICT_DICT = db.vars.find_one({'id': 'districtDict'})['value']
DISTRICT_DICT = {
    item['id']: item['name'] for item in DISTRICT_DICT
}

AREA_DICT = db.vars.find_one({'id': 'areaDict'})['value']
AREA_DICT = {
    item['id']: item['name'] for item in AREA_DICT
}

DATE_ATTENUATION_PARAM = 0.9

def ageClassFunction(user = None, userId = None):
    if user is None:
        user = db.usersV2.find_one({'userId': userId})
    
    age = (datetime.now() - user['dateBirth']).days / 365.25
    if age < 60:
        return '<60'
    if age > 90:
        return '>90'
    return '%d-%d'%(age // 10  * 10, (age // 10 + 1)  * 10)

class ModelMeta:
    
    def __init__(self, db = None, d3Id2Idx = None, modelPath = None):
        
        if (db is None):
            raise Exception('Model need DB connection to work')
            
        self.db = db
        self.d3Id2Idx = d3Id2Idx
        
        if modelPath is not None:
            self.d3Id2Idx = pkl.load(open(f'{modelPath}/d3Id2Idx.pkl', 'rb'))
            self.classFunction = pkl.load(open(f'{modelPath}/classFunction.pkl', 'rb'))
            self.dictClasses = pkl.load(open(f'{modelPath}/dictClasses.pkl', 'rb'))
        
        return None
        
    def fit(self, dfm, classFunction):
        
        self.classFunction = classFunction
        
        self.classes = set()

        self.userClasses = {}
        users = list(self.db.usersV2.find({}))
        
        for user in tqdm(users):
            cls = self.classFunction(user = user)
            self.classes.add(cls)
            self.userClasses[user['userId']] = cls
            
        dfClasses = pd.DataFrame([{'userId': k, 'cls': v} for k, v in self.userClasses.items()])
        dfClasses = df.merge(dfClasses, on = 'userId', how = 'inner')
        dictClasses = dfClasses.groupby('cls').agg({'d3LevelId': list}).to_dict(orient = 'index')
        
        for k in dictClasses:
            vec = np.zeros(len(self.d3Id2Idx))

            for i in dictClasses[k]['d3LevelId']:
                vec[self.d3Id2Idx[i]] += 1

            dictClasses[k] = vec / np.sum(vec)
            
        self.dictClasses = dictClasses
        
        return None
    
    def save(self, modelPath):
        try:
            os.mkdir(modelPath)
        except:
            logging.exception('Unable to create folder for model')
            
        pkl.dump(self.classFunction, open(f'{modelPath}/classFunction.pkl', 'wb'))
        pkl.dump(self.dictClasses, open(f'{modelPath}/dictClasses.pkl', 'wb'))
    
    def __call__(self, userId: str):
        cls = self.classFunction(userId = userId)
        return self.dictClasses[cls]

def multiplyProbVecs(v1, v2):
    vm = v2 / (1 - v2)
    v1 *= vm
    v1 = v1 / v1.sum()
    return v1

def getAttends(userId: str):
    
#     recs = df[df['userId'] == userId].to_dict(orient = 'records')
    recs = db.attends.find({'userId': userId})
    
    return [
        {'d3LevelId': r['d3LevelId'], 'date': r['date']} for r in recs
    ]

class ModelMain():
    
    def __init__(self, d3LevelCounts = None, probTotal = None, d3Id2Idx = None, modelPath = None):
        self.d3LevelCounts = d3LevelCounts
        self.d3Id2Idx = d3Id2Idx
        self.probTotal = probTotal
        
        if modelPath is not None:
            self.d3LevelCounts = pkl.load(open(f'{modelPath}/d3LevelCounts.pkl', 'rb'))
            self.probTotal = pkl.load(open(f'{modelPath}/probTotal.pkl', 'rb'))
            self.d3Id2Idx = pkl.load(open(f'{modelPath}/d3Id2Idx.pkl', 'rb'))
        
    def recomendForUser(
        self,
        userId: int, 
        best: int = 5, 
        rare: int = 5,
        randomChoice = False,
        dateAttenuation = True
    ):

        attends = getAttends(userId)
        
        probVecBest = self.recommendVectorBest(attends, dateAttenuation = dateAttenuation)
        probVecRare = self.recommendVectorRare(attends, dateAttenuation = dateAttenuation)

        for a in attends:
            probVecBest[self.d3Id2Idx[a['d3LevelId']]] = 0
            probVecRare[self.d3Id2Idx[a['d3LevelId']]] = 0


        return probVecBest, probVecRare

    def recommendFromVector(probVec, n: int = 5, randomChoice = False):

        if randomChoice:
            res = np.random.choice(list(range(probVec.shape[0])), n, p = probVec, replace = False)
        else:
            res = np.argsort(probVec)[::-1][:n]

    #         def getSetProb(arr):

    #         p = 1.0

    #         for i in range(len(arr)):
    #             for j in range(i + 1, len(arr)):
    #                 p *= d3LevelCounts[arr[i]][arr[j]]

    #         return p

    #     if randomChoice:
    #         reses = [
    #             np.random.choice(list(range(probVec.shape[0])), n, p = probVec, replace = False)
    #             for i in range(10)
    #         ]

    #         reses = [(r, getSetProb(r)) for r in reses]

    #         res = sorted(reses, key = lambda x: x[1])[0][0]
    #     else:
    #         res = np.argsort(probDiff)[::-1][:n]

        return list(res)

    def recommendVectorBest(self, attends: list, dateAttenuation = False):

        attends = sorted(attends, key = lambda x: x['date'])[::-1]

        probVec = np.array([self.d3LevelCounts[self.d3Id2Idx[a['d3LevelId']]] for a in attends])

        if dateAttenuation:
            dateMult = []

            m = 1
            for i in range(0, len(attends)):
                dateMult.append(m)
                m *= DATE_ATTENUATION_PARAM
        else:
            dateMult = np.ones(len(attends))

        probVec = probVec.transpose().dot(dateMult)
        probVec /= np.sum(probVec)

        return probVec

    def recommendVectorRare(self, attends: list, dateAttenuation = False):

        attends = sorted(attends, key = lambda x: x['date'])[::-1]

        probVec = np.array([self.d3LevelCounts[self.d3Id2Idx[a['d3LevelId']]] for a in attends])

        if dateAttenuation:
            dateMult = []

            m = 1
            for i in range(0, len(attends)):
                dateMult.append(m)
                m *= DATE_ATTENUATION_PARAM
        else:
            dateMult = np.ones(len(attends))

        probVec = probVec.transpose().dot(dateMult)
        probVec /= np.sum(probVec)

        probDiff = (probVec - self.probTotal)
        probDiff[np.where(probDiff > 0)] = 0
        probDiff = -probDiff
        probDiff /= probDiff.sum()

        return probDiff

    def recommendForUserNew(self):
        return None
    
    def save(self, modelPath):
        try:
            os.mkdir(modelPath)
        except:
            logging.exception('Unable to create folder for model')
            
        pkl.dump(self.d3LevelCounts, open(f'{modelPath}/d3LevelCounts.pkl', 'wb'))
        pkl.dump(self.probTotal, open(f'{modelPath}/probTotal.pkl', 'wb'))
        pkl.dump(self.d3Id2Idx, open(f'{modelPath}/d3Id2Idx.pkl', 'wb'))
        
def recommendFromVector(probVec, n: int = 5, randomChoice = False):

    if randomChoice:
        res = np.random.choice(list(range(probVec.shape[0])), n, p = probVec, replace = False)
    else:
        res = np.argsort(probVec)[::-1][:n]
        
    return list(res)

def recommendFiltersd3LevelIds(filters):
    
    filtersActivities = {}

    if ('d0LevelId' in filters)and(filters['d0LevelId'] is not None):
        filtersActivities['d0LevelName'] = {'$in': filters['d0LevelId']}

    if ('d1LevelId' in filters)and(filters['d1LevelId'] is not None):
        filters['d1LevelId'] = [int(id) for id in filters['d1LevelId']]
        filtersActivities['d1LevelId'] = {'$in': filters['d1LevelId']}

    if ('d2LevelId' in filters)and(filters['d2LevelId'] is not None):
        filters['d2LevelId'] = [int(id) for id in filters['d2LevelId']]
        filtersActivities['d2LevelId'] = {'$in': filters['d2LevelId']}

    if ('online' in filters)and(filters['online'] is not None):
        filtersActivities['online'] = filters['online'] == 0

    if ('certificate' in filters)and(filters['certificate'] is not None):
        filtersActivities['certificate'] = filters['certificate'] == 0
        
    if len(filtersActivities) > 0:
        d3LevelIds = set([i['d3LevelId'] for i in db.activities.find(filtersActivities, {'d3LevelId': 1})])
    else:
        d3LevelIds = None

    d3LevelRecsRelevance = None

    filtersSchedule = {}

    # Тут просто завод по производству костылей, но так удобнее для метода поиска
    if ('days' in filters)and(filters['days'] is not None):
        filters['days'] = [int(id) for id in filters['days']]
        filtersSchedule['schedule.day'] = {'$in': [DAYS_ID_DICT[i] for i in filters['days']]}

    if ('district' in filters)and(filters['district'] is not None):
        filters['district'] = [int(id) for id in filters['district']]
        filtersSchedule['district'] = {'$in': [DISTRICT_DICT[i] for i in filters['district']]}

    if ('area' in filters)and(filters['area'] is not None):
        filters['area'] = [int(id) for id in filters['area']]
        filtersSchedule['area'] = {'$in': [AREA_DICT[i] for i in filters['area']]}

    if d3LevelIds is not None:
        filtersSchedule['d3LevelId'] = {'$in': list(d3LevelIds)}
                
    if len(filtersSchedule) == 0:
        return None
    res = list(db.schedule.find(filtersSchedule, {'_id': False}))
        
    return list(set([r['d3LevelId'] for r in res if 'd3LevelId' in r]))



modelMain = ModelMain(modelPath = './model/modelMain')
modelAge = ModelMeta(db = db, modelPath = './model/modelAge')

d3Id2Idx = modelMain.d3Id2Idx
d3Id2IdxReverse = {v : k for k, v in d3Id2Idx.items()}

In [11]:
# filters = {'q': None, 'd0LevelName': None, 'd1LevelName': None, 'd2LevelName': None, 'online': None, 'certificate': None, 'days': None, 'district': ['3'], 'area': None}
# userId = 101387513

# recs = recommendFilters(filters, userId)

73


In [12]:
# userId = 101387513

# print('----Посещенные курсы----')
# for i in set([a['d3LevelId'] for a in getAttends(userId)]):
#     print(i, db.activities.find_one({'d3LevelId': i})['d3LevelName'])

# print('----Рекомендованные курсы----')
# for i in recs:
#     print(i, db.activities.find_one({'d3LevelId': int(i)})['d3LevelName'])

----Посещенные курсы----
1154 ОНЛАЙН Английский язык
----Рекомендованные курсы----
1083 ОНЛАЙН Гимнастика
1421 ОНЛАЙН Краеведение и онлайн-экскурсии
1162 ОНЛАЙН История искусства
1157 ОНЛАЙН Здорово жить
1155 ОНЛАЙН Английский язык для начинающих
1083 ОНЛАЙН Гимнастика
1157 ОНЛАЙН Здорово жить
1089 ОНЛАЙН Оздоровительная гимнастика
120 Цигун
1427 ОНЛАЙН Иные интеллектуальные игры


In [15]:
db.usersV2.find_one({'userId': 101387513})

{'_id': ObjectId('646ffbea0ddba6d6eda28a60'),
 'userId': 101387513,
 'dateCreated': datetime.datetime(2019, 1, 24, 16, 15, 59),
 'surName': 'Логинова',
 'name': 'Екатерина',
 'thirdName': 'Марковна',
 'gender': 'Женщина',
 'dateBirth': datetime.datetime(1947, 2, 23, 0, 0),
 'address': 'город москва, одоевского пр., дом 7, строение 4'}