In [190]:
import pylast
import pandas as pd
import numpy as np

API_KEY = '42d5a344a079b31fdf1e25696a8bd517'
API_SECRET = '02bcec5c2ab24babcc5f5cba374276fb'

network = pylast.LastFMNetwork(api_key=API_KEY, api_secret=API_SECRET)

In [205]:
#Generate a list of users using their friends 
def make_friends_list(initial_user, min_length=1000, friends_list=[], initial_users_list=[]):
    
    #Add initial user to friends list
    friends_list.append(initial_user)
    #Add initial user to list of user "seeds"
    initial_users_list.append(initial_user)
    #Get user's friends
    user = network.get_user(initial_user)
    user_friends = user.get_friends(limit=None)

    #Add friends to user list 
    for i in range(len(user_friends)):
        #Make sure they're not already in the list
        if user_friends[i].get_name() not in friends_list:
            friends_list.append(user_friends[i].get_name()) 
    
    #Find another user whose friends can be added to list
    for i in range(len(friends_list)):
        #Make sure they haven't already been used as a seed
        if friends_list[i] not in initial_users_list:
            new_user = friends_list[i]
    
    #Look at new user's friend's list until list is sufficiently long
    while len(friends_list) < min_length:
        list = make_friends_list(initial_user=new_user, min_length=min_length, friends_list=friends_list, 
                                 initial_users_list=initial_users_list)
    
    #Remove duplicates
    return set(friends_list)

In [206]:
#Create a DataFrame with users are rows, artists as columns, and play counts as values
def artist_plays_from_users(users_list=users_list):

    artist_plays_list = []
    users_list = list(users_list)

    #Loop through users
    for user_name in users_list:
        #Get the user's top artists
        user = network.get_user(user_name)
        user_artists = user.get_top_artists()
        
        #For each user and each artist they listen to, create a list containing the username, artist, and play count
        for i in range(len(user_artists)):
            artist_name = user_artists[i]._asdict()['item'].get_name()
            num_plays = int(user_artists[i]._asdict()['weight'])
            artist_list = [user_name, artist_name, num_plays]
            #Create a list of lists containing this info
            artist_plays_list.append(artist_list)
            
    #Create DataFrame 
    df = pd.DataFrame(artist_plays_list)
    df.columns = ['user', 'artist', 'plays']
    artist_plays = df.pivot(index='user', columns='artist', values='plays').fillna(0)
            
    return artist_plays

In [207]:
from sklearn.neighbors import NearestNeighbors

#Use NearestNeighbors model to find similar users
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')

In [208]:
def get_similar_users(user, artist_plays=artist_plays, num_users=10):
    
    users_list = list(artist_plays.index)
    
    #Make sure user data is available
    if user in users_list:
        #Fit model and find distances and indices of (num_users+1) nearest neighbors
        model_knn.fit(artist_plays)
        distances, indices = model_knn.kneighbors(artist_plays, n_neighbors=num_users+1)
        #Calculate index of user
        user_index = users_list.index(user)
        
        #Create list of similar users
        similar_users_indices = indices[user_index]
        similar_users = [users_list[i] for i in similar_users_indices]
        
        #Create list of distances
        user_distances = distances[user_index]
        #Convert cosine distance to similarity score
        similarity_scores = 1 - user_distances
        
        #Exclude original user 
        return similar_users[1:], similarity_scores[1:]
    
    else:
        print("User not in list!")

In [209]:
import itertools

def get_artist_recommendations(user, artist_plays=artist_plays, num_comparisons=20, num_recommendations=10):
    
    #Get similar users and similarity scores
    users_list = make_friends_list(user)
    artist_plays = artist_plays_from_users(users_list)
    similar_users, similarity_scores = get_similar_users(user, artist_plays=artist_plays, num_users=num_comparisons)
    
    artist_list = []
    user_artist_list = []
    recommended_artists = []
    
    #Get list of top artists from all similar users
    for user_name in similar_users:
        current_user = network.get_user(user_name)
        user_artists = current_user.get_top_artists()
        artist_names = [user_artists[i]._asdict()['item'].get_name() for i in range(len(user_artists))]
        artist_list.append(artist_names)
        
    #Flatten the list and remove duplicates
    flattened_artist_list = list(itertools.chain.from_iterable(artist_list))
    artist_columns = list(set(flattened_artist_list))
    
    #Create a dataframe of similar users and play counts for their top artists
    artist_plays = artist_plays.loc[similar_users, artist_columns]
    #Add a column of similarity scores for each user
    artist_plays['score'] = similarity_scores
    #Multiply play counts by similarity score 
    weighted_plays = artist_plays.drop('score', axis=1).multiply(artist_plays['score'], axis='index')
    #Sum weighted scores for each artist
    weighted_totals = weighted_plays.sum()
    #Calculate sum of similarity scores for the users who have listened to each artist, to correct for
    #more popular artists having a big advantage
    booleandf = artist_plays.drop('score', axis=1) != 0
    similarity_df = booleandf.multiply(artist_plays['score'], axis='index')
    similarity_sums = similarity_df.sum()
    #For each artist, divide sum of weighted scores by sum of similarity scores
    artist_rankings = weighted_totals / similarity_sums
    #Rank the artists in ascending score
    artist_rankings = artist_rankings.sort_values(ascending=False)
    
    #Get top artists of original user
    user = network.get_user(user)
    user_artists = user.get_top_artists()
    
    #Create list of top artists of original user
    for i in range(len(user_artists)):
        artist = user_artists[i]._asdict()['item'].get_name()
        user_artist_list.append(artist)
    
    #Go through each possible recommended artist
    for i in range(len(artist_rankings)):    
        #Add to recommended artists list if not in user's top artists
        if artist_rankings.index[i] not in user_artist_list:
            recommended_artists.append(artist_rankings.index[i])
        #Stop when we have the desired number of recommendations
        if len(recommended_artists) == num_recommendations:
            break
        
    return recommended_artists

In [213]:
get_artist_recommendations('jonhdee23')

['Rasputina',
 'Lalleshwari',
 'The Dead Weather',
 'Eths',
 'Mother Mother',
 'Soap&Skin',
 'Set Fire to Flames',
 'Archive',
 'EZ3kiel',
 'The Blood Brothers']

In [214]:
get_artist_recommendations('OutThisLife')

['Queens of the Stone Age',
 'BROCKHAMPTON',
 'Wicca Phase Springs Eternal',
 'Vic Mensa',
 'Kyuss',
 'Madlib',
 'J Dilla',
 'Car Seat Headrest',
 'TTNG',
 'Carly Rae Jepsen']

In [215]:
get_artist_recommendations('azekulic')

['The Legendary Pink Dots',
 'Depeche Mode',
 'The Smiths',
 'Tom Waits',
 'Masada',
 'Cocteau Twins',
 'Brian Eno',
 'Sade',
 'The Beatles',
 'Nine Inch Nails']