In [7]:
from itertools import permutations

import numpy as np
import pandas as pd


class ItemCollaborativeFiltering:
    def __init__(self, item_column='item_id', user_column='user_id'):
        
        self.item_column = item_column
        self.user_column = user_column
        self.df_recommendations = pd.DataFrame()

    def __generate_item_pairs(self, df, item):
        
        if item is not None:
            return [(item, paired_item) for paired_item in df[self.item_column].unique() if paired_item != item]
        else:
            return [(item, paired_item) for
                    item, paired_item in permutations(df[self.item_column].unique(), 2)]

    def __calculate_item_users(self, df):

        item_users = df.groupby(
            self.item_column
        ).agg(
            {
                self.user_column: lambda x: set(x)
            }
        ).to_dict()[self.user_column]

        self.item_users = item_users

    def __count_common_item_pair_users(self, item_pair):
        
        item1, item2 = item_pair
        try:
            item1_users = self.item_users[item1]
            item2_users = self.item_users[item2]
            common_users_count = len(item1_users.intersection(item2_users))
            return item_pair, common_users_count
        except AttributeError:
            "Extract item users first, using __item_users()"

    def __calculate_item_probabilities(self, df):
        
        item_probabilities = (df.groupby(
            self.item_column
        ).agg(
            {
                self.user_column: 'nunique'
            }
        )/df[self.user_column].nunique()).to_dict()[self.user_column]

        self.item_probabilities = item_probabilities

    def __item_interaction_probability(self, item):
        
        try:
            return self.item_probabilities[item]
        except AttributeError:
            "Extract item probabilities first, using __item_probabilities()"

    def __calculate_user_interactions(self, df):
        
        # subtract 1 to count number of interactions with other items DIFFERENT from item
        interactions_count = df.groupby(
            self.user_column
        )[self.item_column].agg('nunique') - 1

        self.user_interactions = interactions_count.to_dict()

    def __count_users_interactions(self, df, item):

        try:
            filtered_users = df.loc[df[self.item_column] == item, self.user_column].values
            interactions_count = np.array([self.user_interactions[user] for user in filtered_users])
            return interactions_count
        except AttributeError:
            "Extract user interactions first, using __user_interactions()"

    def __expected_common_item_pair_users(self, df, item_pair):

        item1, item2 = item_pair

        product_probability = self.__item_interaction_probability(item2)

        interactions_count = self.__count_users_interactions(df, item1)

        return np.sum(1 - (1 - product_probability) ** interactions_count)

    @staticmethod
    def __recommendations_score_function(expected_users, actual_users):
        
        return (actual_users - expected_users) * np.log(actual_users + 0.1) / np.sqrt(expected_users)

    def fit_recommendations(self, df, item=None):
        
        item_pairs = self.__generate_item_pairs(df, item)

        self.__calculate_item_users(df)
        self.__calculate_item_probabilities(df)
        self.__calculate_user_interactions(df)

        # output: [((item1, item2), common_users)]
        count_pair_users = [self.__count_common_item_pair_users(item_pair) for item_pair in item_pairs]

        # filter out item pairs with no users in common
        count_pair_users = list(
           filter(
               lambda x: x[1] > 0,
               count_pair_users
           )
        )

        # extract item pair, and user count
        filtered_item_pairs, count_pair_users = zip(*count_pair_users)

        # output: [expected_users]
        # compute expected users for item pairs with at least 1 user in common
        expected_pair_users = [self.__expected_common_item_pair_users(df, item_pair)
                               for item_pair in filtered_item_pairs]

        # recommendation score function
        pair_score = self.__recommendations_score_function(np.array(expected_pair_users), np.array(count_pair_users))

        items, recommended_items = zip(*filtered_item_pairs)

        df_recommendations = pd.DataFrame({
            'item': items,
            'recommended_item': recommended_items,
            'count_common_users': count_pair_users,
            'expected_common_users': expected_pair_users,
            'score': pair_score
        })

        return df_recommendations

    @staticmethod
    def recommend(df_recommendations, item, n_recommendations=10):

        recommended_items = df_recommendations[df_recommendations.item == item]
        recommended_items = recommended_items.sort_values(
            'score', ascending=False
        ).groupby(
            'item'
        ).head(n_recommendations)['recommended_item'].values
        return recommended_items


In [8]:
df=pd.read_csv(r"C:\Users\dilini\Desktop\final year project\songs.csv")
df.head()

Unnamed: 0,user_id,age_group,gender,profession,hours_spending,musical_aspect,song_id
0,10001,18 - 24,Female,Student,0 - 1,Tempo/speed,174
1,10001,18 - 24,Female,Student,0 - 1,Tempo/speed,220
2,10002,25 - 34,Male,Working,2 - 3,The singer's voice,221
3,10003,18 - 24,Female,Software Engineer,more than 5,The singer's voice,164
4,10004,18 - 24,Male,software engineer,more than 5,The singer's voice,6


In [9]:
dataset = df[['user_id', 'song_id']]
dataset.head()

Unnamed: 0,user_id,song_id
0,10001,174
1,10001,220
2,10002,221
3,10003,164
4,10004,6


In [20]:
if __name__=="__main__":
    test = ItemCollaborativeFiltering(item_column="song_id",user_column="user_id")
    df_recommendations=test.fit_recommendations(dataset)
    #print(df_recommendations.sort_values('score', ascending=False).head())
    recommended_items=test.recommend(df_recommendations,174)
    #print(recommended_items)

In [23]:
df_recommendations.sort_values('score', ascending=False).head()

Unnamed: 0,item,recommended_item,count_common_users,expected_common_users,score
25,164,179,2,0.062012,5.774063
100,26,210,2,0.096519,4.545782
110,210,191,2,0.108463,4.261296
324,179,164,2,0.124023,3.952235
279,191,210,2,0.133323,3.793015


In [24]:
recommended_items

array([179, 140,  79, 227, 220,  88, 165, 170,  56, 306], dtype=int64)