# <center> Music Recommender System

We draw inspiration from Spotify's "Discover Weekly" service and aim to recommend a list of songs to each user according to his preference.

In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from copy import copy, deepcopy
import random
from scipy.stats import norm
import pandas as pd

## Import Data 

In [2]:
df = pd.read_csv("songs_data.csv")
df.head()

Unnamed: 0,user_id,item_id,listening_time_in_scale_0-5,title,duration,event_type
0,196,242,2,Atmosphere Station,191.81669,"""Listen it frequently""""View Song Information""""..."
1,63,242,1,Atmosphere Station,191.81669,"""Listen it frequently""""Like""""View Song Informa..."
2,226,242,0,Atmosphere Station,191.81669,"""Listen it frequently""""Add it to your music"""
3,154,242,4,Atmosphere Station,191.81669,"""Listen it frequently""""Like""""View Song Informa..."
4,306,242,2,Atmosphere Station,191.81669,"""Have listened it completely""""Like""""View Song..."


In [3]:
df['event_type'].value_counts()

"Listen it frequently""Like""View Song Information"                                            2955
"Skip it""Not recommend it anymore"                                                            2915
"Listen it frequently""Download"                                                               2878
"Listen it frequently""View Song Information""Download"                                        2874
"Have listened  it completely""Like""Add it to your music"                                     2851
"Have listened  it completely""Like""Add it to your music""Download"                           2847
"Listen it frequently""Like""Download"                                                         2842
"Listen it frequently""Add it to your music"                                                   2823
"Listen it frequently"                                                                         2822
"Listen it frequently""Like""View Song Information""Add it to your music"                      2815


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 6 columns):
user_id                        100000 non-null int64
item_id                        100000 non-null int64
listening_time_in_scale_0-5    100000 non-null int64
title                          100000 non-null object
duration                       100000 non-null float64
event_type                     100000 non-null object
dtypes: float64(1), int64(3), object(2)
memory usage: 4.6+ MB


In [5]:
df['user_id'].value_counts()

405    737
655    685
13     636
450    540
276    518
      ... 
147     20
19      20
572     20
636     20
895     20
Name: user_id, Length: 943, dtype: int64

In [6]:
df['item_id'].value_counts()

50      583
258     509
100     508
181     507
294     485
       ... 
1648      1
1571      1
1329      1
1457      1
1663      1
Name: item_id, Length: 1682, dtype: int64

In [7]:
len(df['item_id'].value_counts())

1682

## Data Preprocessing 

Define strength for each event in 'event_type'

Quantify implicit event feedback 

In [8]:
# event --> strength
event_type_strength = {
    'Listen it frequently': 3.0,
    'Have listened  it completely': 1.0,
    'Like': 1.0,
    'View Song Information': 1.0,
    'Download': 3.0,
    'Add it to your music': 2.0,
    'Skip it':  -1.0, 
    'Skip it frequently': -3.0,
    'Not recommend it anymore': -2.0}

In [9]:
# extract event into a list from a string
df['event_type'] = df['event_type'].apply(lambda x: [e for e in x.split("\"") if len(e)])

# add a column event_strength
df['event_strength'] = df['event_type'].apply(lambda x: np.sum([event_type_strength[e] for e in x]))

In [10]:
# stats on event strength
df['event_strength'].value_counts()

 7.0     14115
 5.0     13980
 6.0     13854
 4.0     13735
 8.0      8337
 3.0      8264
-3.0      5684
 9.0      5531
 2.0      5450
-5.0      2810
-1.0      2761
 1.0      2760
 10.0     2719
Name: event_strength, dtype: int64

In [11]:
df.head()

Unnamed: 0,user_id,item_id,listening_time_in_scale_0-5,title,duration,event_type,event_strength
0,196,242,2,Atmosphere Station,191.81669,"[Listen it frequently, View Song Information, ...",7.0
1,63,242,1,Atmosphere Station,191.81669,"[Listen it frequently, Like, View Song Informa...",5.0
2,226,242,0,Atmosphere Station,191.81669,"[Listen it frequently, Add it to your music]",5.0
3,154,242,4,Atmosphere Station,191.81669,"[Listen it frequently, Like, View Song Informa...",7.0
4,306,242,2,Atmosphere Station,191.81669,"[Have listened it completely, Like, View Song...",5.0


## Spotify environment 

In [12]:
def random_argmax(rng, list_):
    """ return a random element among max when multiple max exists."""
    return rng.choice(np.argwhere(list_ == list_.max()).flatten())

In [13]:
np.set_printoptions(precision=2)

In [15]:
class music_rec_env:
    """ A rating environment that simulates Spotify.
        User and items are represented by points in R^k
        User interest for a given item is modeled by a parametric function
        R_{u,i} = f(u,i) = f(W_u, W_i)
        Example of function include dot product (cosine similarity)
        R_{u,i} = \sum_k w_{u,k} . w_{i,k}
        action: Recommend one song for a given user among those he has never bought before
    """
    
    def __init__(self, nb_users, nb_items, 
                 seed=None,
                 internal_embedding_size=3,
                 displayed_users_embedding_size=2,
                 displayed_items_embedding_size=2,
                 noise_size=2):
        self.nb_users = nb_users
        self.nb_items = nb_items
        self.internal_embedding_size = internal_embedding_size
        self.displayed_users_embedding_size = displayed_users_embedding_size
        self.displayed_items_embedding_size = displayed_items_embedding_size
        self.noise_size = noise_size
        self._rng = np.random.RandomState(seed)
        
        self.action_size = self.nb_items
        self.sampling_limit = nb_users * nb_items
        self.users_embedding = None
        self.items_embedding = None
        self.user_item_history = None
        self.done = False

    def step(self, action_list):
        # check if behind done
        if self.done:
            print("You are calling step after it return done=True.\n"
                  "You should reset the environment.")

        # compute potential rewards
        potential_rewards = [self._get_user_item_rating(self.current_user, i) 
                             for i in np.argwhere(self.user_item_history[self.current_user, :] == 0).flatten()]
        
        # compute optimal reward
        optimal_return = np.max(potential_rewards)

        # map actions to items
        self.recommended_items = [np.argwhere(self.user_item_history[self.current_user, :] == 0)[action][0] for action in action_list]

        # mark items as rated
        self.user_item_history[self.current_user, self.recommended_items] = 1

        # compute reward R_t
        self.current_ratings = [self._get_user_item_rating(self.current_user, rec_item) for rec_item in self.recommended_items]
        self.rewards = self.current_ratings
        
        # check if done
        if self.user_item_history.sum() == self.sampling_limit:
            self.done = True

        # compute next state S_{t+1}
        self._next_state()

        # update action space t+1
        self.action_size = len(self.available_items)

        return self.rewards, self.state, self.done, optimal_return

    def reset(self, seed=None):
        self._rng = np.random.RandomState(seed)
        self.action_size = self.nb_items
        
        # create users and items embedding matrix 
        self.users_embedding = self._rng.randint(1,6, size=(self.nb_users, self.internal_embedding_size))/5
        self.items_embedding = self._rng.randint(1,6, size=(self.nb_items, self.internal_embedding_size))/5

        # initialize history matrix
        self.user_item_history = np.zeros((self.nb_users, self.nb_items))
        self.done = False

        self._next_state()
        return self.state

    def _get_user_item_rating(self, user, item): 
        # return actual score, no noise 
        score = self.users_embedding[user].dot(self.items_embedding[item])
        return score

    def _get_variables(self, user, item):
        user_embedding = self.users_embedding[user]
        item_embedding = self.items_embedding[item]
        if self.displayed_users_embedding_size + self.displayed_items_embedding_size > 0:
            variables = np.array([user_embedding[:self.displayed_users_embedding_size],
                                  item_embedding[:self.displayed_items_embedding_size]])

            if self.noise_size > 0:
                noise = self._rng.randint(1,6, size=self.noise_size)/5
                variables = np.append(variables, noise)

            return variables

    def _get_new_user(self):
        has_picked = False
        user = self._rng.randint(0, self.nb_users)
        # get a new user who hasn't tried all the songs
        while np.sum(self.user_item_history[user, :]) == self.nb_items:
            user = self._rng.randint(0, self.nb_users)
        return user    

    def _next_state(self):
        # Pick a user
        if self.user_item_history.sum() < self.sampling_limit: 
            self.current_user = self._get_new_user()
        else:
            self.current_user = None

        # list available items
        self.available_items = np.argwhere(self.user_item_history[self.current_user, :] == 0)

        self.state = list()
        for i in self.available_items:
            item = i[0]   
            # compute variables (part of the user, item embedding (with noise))
            variables = self._get_variables(self.current_user, item)
            self.state.append([self.current_user, item, variables])


In [16]:
env =  music_rec_env(nb_users = len(df['user_id'].value_counts()), nb_items = len(df['item_id'].value_counts()), seed = 2020)

In [17]:
env.reset(seed=2020)[:10]  # user, available_item, user_item_feature with noise

[[70, 0, array([0.8, 1. , 0.8, 0.4, 0.6, 0.2])],
 [70, 1, array([0.8, 1. , 1. , 0.4, 0.2, 0.4])],
 [70, 2, array([0.8, 1. , 0.4, 0.6, 0.8, 1. ])],
 [70, 3, array([0.8, 1. , 1. , 0.8, 0.4, 0.8])],
 [70, 4, array([0.8, 1. , 0.2, 0.4, 0.6, 1. ])],
 [70, 5, array([0.8, 1. , 0.4, 1. , 0.8, 0.8])],
 [70, 6, array([0.8, 1. , 1. , 0.8, 0.4, 1. ])],
 [70, 7, array([0.8, 1. , 0.6, 1. , 0.6, 0.2])],
 [70, 8, array([0.8, 1. , 0.8, 0.4, 0.2, 0.6])],
 [70, 9, array([0.8, 1. , 0.4, 0.8, 1. , 0.6])]]

User 70 is connecting to the platform.

Recommend him three songs among all available songs.

For each user-item pair, observe also a vector of features that depends on the user, the song and/or some context(like occasion, weather, user's emotion, etc).

In [18]:
reward, next_state, done, optimal_return = env.step(np.arange(3)) # pick the first 3 available items in the list 
print('reward: ', reward)

reward:  [1.24, 2.2, 1.32]


In [19]:
optimal_return

2.8