In [8]:
from pymongo import MongoClient
from bson.son import SON
from bson.code import Code

client = MongoClient('localhost',27017)
db = client.movie_lens

In [10]:
def similarity(I,J):
    intersection=db.ratings.aggregate([
        {
            '$match':{
                '$or':[
                    {'userId':I},
                    {'userId':J}
                ]
            }
        },
        {
            '$group':{
                '_id':'$movieId',
                'count':{'$sum':1}
            }
        },
        {
            '$match':{
                'count':{'$eq':2}
            }
        }
    ])
    count=0
    for i in intersection:
        count+=1
    return count

In [42]:
similarity(1,2)

4

In [12]:
def dissimilarity(I,J):
    union=db.ratings.aggregate([
        {
            '$match':{
                '$or':[
                    {'userId':I},
                    {'userId':J}
                ]
            }
        },
        {
            '$group':{
                '_id':'$movieId',
                'count':{'$sum':1}
            }
        }
    ])
    union_IJ=0
    for i in union:
        union_IJ+=1
    
    intersection=db.ratings.aggregate([
        {
            '$match':{
                '$or':[
                    {'userId':I},
                    {'userId':J}
                ]
            }
        },
        {
            '$group':{
                '_id':'$movieId',
                'count':{'$sum':1}
            }
        },
        {
            '$match':{
                'count':{'$eq':2}
            }
        }
    ])
    intersection_IJ=0
    for i in intersection:
        intersection_IJ+=1
    
    return union_IJ-intersection_IJ

In [43]:
dissimilarity(1,2)

246

In [15]:
# mapper=Code('''
#     function() {
#         emit(this.movieId,1);
#     };
# ''')

In [16]:
# reducer=Code('''
#     function(key,values) {
#         var total=0;
#         for(var i=0;i<values.length;i++){
#             total+=values[i];
#         }
#         return total;
#     };
# ''')

In [17]:
# db.ratings.map_reduce(mapper,reducer,'movie_collector')

In [19]:
def summation_similarity_I(I):
    cur=db.ratings.aggregate([
        {
            '$match':{'userId':I}
        },
        {
            '$lookup':{
                'from':'movie_collector',
                'localField':'movieId',
                'foreignField':'_id',
                'as':'counting'
            }
        },
        {
            '$project':{
                'userId':1,
                'movieId':1,
                'counting':'$counting.value'          
            }     
        },
        {
            '$unwind':'$counting'
        },
        {
            "$group": {
                "_id": "$userId",
                "count": {
                    "$sum": '$counting'
                }
            }
        }]
    )
    return list(cur)[0]['count']

In [20]:
def hat_similarity_IJ(I,J):
    return similarity(I,J)/summation_similarity_I(I)

In [44]:
hat_similarity_IJ(1,2)

6.65347061661039e-06

In [22]:
def gamma_I(I):    
    cur=db.ratings.aggregate([
        {
            '$match':{'userId':I}
        },
        {
            '$group':{
                '_id':'$userId',
                'count':{
                    '$sum':1
                }
            }
        }
    ])
    gamma_I=list(cur)[0]['count']
    return gamma_I

In [25]:
cur=db.ratings.aggregate([
    {
        '$group':{
            '_id':'$userId'
        }
    },
    {
        '$group':{
            '_id':'null',
            'count':{
                '$sum':1
            }
        }
    },
    {
        '$sort':{'_id':1}
    }
])
num_users=list(cur)[0]['count']

In [26]:
print(num_users)

162541


In [27]:
cur=db.ratings.aggregate([
    {
        '$group':{
            '_id':'null',
            'count':{
                '$sum':1
            }
        }
    }
])
num_ratings=list(cur)[0]['count']

In [28]:
print(num_ratings)

25000095


In [29]:
def summation_dissimilarity_I(I):
    summation=0
    summation+=(num_users*gamma_I(I))
    summation+=num_ratings
    summation-=(2*summation_similarity_I(I))
    
    return summation

In [30]:
summation_dissimilarity_I(1)

35175585.0

In [31]:
def hat_dissimilarity_IJ(I,J):
    return dissimilarity(I,J)/summation_dissimilarity_I(I)

In [32]:
hat_dissimilarity_IJ(1,2)

6.993487101920266e-06

In [39]:
def summation_hat_similarity(I,alpha):
    db.movie_to_users.aggregate([
        {
            '$match':{'_id':alpha}
        },
        {
            '$unwind':'$value'
        },
        {
            '$unwind':'$value'
        },
        {
            '$project':
            {
                'alpha':'$_id',
                'userId':'$value',
                '_id':0
            }
        },
        {
            '$lookup':
            {
                'from':'user_to_movies',
                'localField':'userId',
                'foreignField':'_id',
                'as':'J_movie_list'
            }
        },
        {
            '$project':
            {
                'alpha':1,
                'user_J':'$userId',
                'J_movie_list':'$J_movie_list.value',
            }
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$group':
            {
                '_id':'$J_movie_list',
                'movieCount':{'$sum':1},
            }
        },
        {
            '$project':
            {
                'movieId':'$_id',
                'movieCount':1,
            }
        },
        {
            '$out':'temp_coll'
        }
    ])
    cur=db.user_to_movies.aggregate([
        {
            '$match':{'_id':I}
        },
        {
            '$unwind':'$value'
        },
        {
            '$unwind':'$value'
        },
        {
            '$lookup':
            {
                'from':'temp_coll',
                'localField':'value',
                'foreignField':'_id',
                'as':'Count'
            }
        },
        {
            '$unwind':'$Count'
        },
        {
            '$group':
            {
                '_id':'$_id',
                'count':{
                    '$sum':'$Count.movieCount'
                }
            }
        }
    ])
    total_similarity=list(cur)[0]['count']
    return total_similarity

In [40]:
summation_hat_similarity(1,356)

430531

In [46]:
def positive_recommendation_score(I,alpha):
    total_similarity=summation_hat_similarity(I,alpha)
    summation=summation_similarity_I(I)
    db.temp_coll.drop()
    return total_similarity/summation

In [47]:
positive_recommendation_score(1,356)

0.7161313395099719

In [48]:
def summation_union(I,alpha):
    cur=db.movie_to_users.aggregate([
        {
            '$match':{'_id':alpha}
        },
        {
            '$unwind':'$value'
        },
        {
            '$unwind':'$value'
        },
        {
            '$project':
            {
                'alpha':'$_id',
                'userId':'$value',
                '_id':0
            }
        },
        {
            '$lookup':
            {
                'from':'user_to_movies',
                'localField':'userId',
                'foreignField':'_id',
                'as':'J_movie_list'
            }
        },
        {
            '$project':
            {
                'alpha':1,
                'user_J':'$userId',
                'J_movie_list':'$J_movie_list.value',
            }
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$unwind':'$J_movie_list'
        },
        {
            '$group':
            {
                '_id':'$alpha',
                'userCount':{'$sum':1},
            }
        },
    ])
    return list(cur)[0]['userCount']

In [49]:
def negative_recommendation_score(I,alpha):
    total_similarity=summation_hat_similarity(I,alpha)
    total_union=summation_union(I,alpha)
    numerator=total_union-total_similarity
    denominator=summation_dissimilarity_I(I)
    return numerator/denominator

In [50]:
negative_recommendation_score(1,356)

0.5168711195563628

In [52]:
def Advanced_Collaborative_Filtering(user_I,movie_J,lambda_):
    positive_IJ=positive_recommendation_score(user_I,movie_J)
    negative_IJ=negative_recommendation_score(user_I,movie_J)
    final_score=positive_IJ+(lambda_*negative_IJ)
    return final_score

In [54]:
lambda_=-0.9
Advanced_Collaborative_Filtering(1,356,lambda_)

0.25094733190924534