In [1]:
import pandas as pd
import numpy as np
import random
from collections import deque

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [2]:
# loading song dataset
all_songs = pd.pandas.read_csv('SpotifyFeatures.csv')

In [3]:
# setting configuration variables

RECOMMENDATIONS_SIZE = 17
ADDITIONAL_RANDOM_SONGS_COUNT = 3
INITIAL_FAVORITE_COUNT = 3
INITIAL_FAVORITE_GENRES_MIN_SHARE = 0.8
MIN_GRADES_COUNT_FOR_PREDICTION = 100
LAST_LISTENED_QUEUE_SIZE = 50

In [4]:
def remove_duplicate_songs(id_lists):
    all_ids = list(set(song_id for id_list in id_lists for song_id in id_list))
    
    # filtering out the same song but with different genre
    filtered_ids = all_songs.iloc[all_ids].drop_duplicates(subset=['track_id']).index
    
    # used to filter out the same song listed in different lists
    added_ids = set()
    
    for i in range(len(id_lists)):
        id_lists[i][:] = list(filter(lambda x, filtered_ids=filtered_ids: x in filtered_ids and x not in added_ids, id_lists[i]))
        added_ids.update(id_lists[i])

def prepare_song_dataset(dataframe):
    dataframe = dataframe.drop(['artist_name', 'genre', 'track_name', 'track_id'], axis=1)

    key_to_int = {k: v for v, k in enumerate(all_songs['key'].unique())}
    mode_to_int = {k: v for v, k in enumerate(all_songs['mode'].unique())}

    dataframe['key'] = dataframe['key'].map(key_to_int)
    dataframe['mode'] = dataframe['mode'].map(mode_to_int)
    
    dataframe['time_signature'] = dataframe['time_signature'].map(lambda x: int(x.split('/')[0]), na_action='ignore')

    return dataframe

class User:
    def __init__(self, name):
        self.name = name

        self.last_listened_songs = []
        self.current_playlist = []
        self.last_listened_playlist_song = -1

        self.liked_song_ids = []
        self.disliked_song_ids = []
        
        self.favorite_genres = []
        self.favorite_artists = []

    def get_current_playlist(self):
        return self.current_playlist

    def add_favorites(self, genres, artists):
        self.favorite_genres = genres
        self.favorite_artists = artists
        
    def get_grades_dataset(self):
        liked_songs = pd.DataFrame(all_songs.iloc[self.liked_song_ids])
        disliked_songs = pd.DataFrame(all_songs.iloc[self.disliked_song_ids])

        liked_songs['liked'] = np.ones((len(liked_songs), 1), dtype=int)
        disliked_songs['liked'] = np.zeros((len(disliked_songs), 1), dtype=int)

        learning_dataset = pd.concat([liked_songs, disliked_songs])
        return prepare_song_dataset(learning_dataset)

    def update_recommendations(self):
        if len(self.liked_song_ids) + len(self.disliked_song_ids) < MIN_GRADES_COUNT_FOR_PREDICTION:
            last_listened_filter = ~all_songs.index.isin(self.last_listened_songs)
            genre_filter = all_songs.genre.isin(self.favorite_genres)
            artist_filter = all_songs.artist_name.isin(self.favorite_artists)

            to_recommend_by_genre = all_songs.index[genre_filter & last_listened_filter].tolist()
            to_recommend_by_artist = all_songs.index[artist_filter & last_listened_filter].tolist()
    
            remove_duplicate_songs([to_recommend_by_artist, to_recommend_by_genre])        
      
            recommendations_by_artist_count = min(len(to_recommend_by_artist), round(RECOMMENDATIONS_SIZE * (1 - INITIAL_FAVORITE_GENRES_MIN_SHARE)))
    
            recommendations_by_artist = random.sample(to_recommend_by_artist, recommendations_by_artist_count)
            recommendations_by_genre = random.sample(to_recommend_by_genre, RECOMMENDATIONS_SIZE - len(recommendations_by_artist))
            
            self.current_playlist = recommendations_by_genre + recommendations_by_artist
            
            for i in range(ADDITIONAL_RANDOM_SONGS_COUNT):
                while True:
                    random_song_id = random.randrange(len(all_songs))
                    if random_song_id not in self.last_listened_songs and random_song_id not in self.disliked_song_ids:
                        break
                self.current_playlist.append(random_song_id)
        else:
            # prediction
            learning_dataset = self.get_grades_dataset()

            X_train = learning_dataset.drop('liked', axis=1)
            y_train = learning_dataset['liked']

            logistic_regression_model = DecisionTreeClassifier()
            logistic_regression_model.fit(X_train, y_train)

            resulted_total_pred = all_songs.copy()
            resulted_total_pred['will_like'] = logistic_regression_model.predict(prepare_song_dataset(resulted_total_pred))

            to_recommend = resulted_total_pred.index[resulted_total_pred['will_like'] == 1].tolist()
            self.current_playlist = random.sample(to_recommend, RECOMMENDATIONS_SIZE + ADDITIONAL_RANDOM_SONGS_COUNT)
        
        random.shuffle(self.current_playlist)

    
    def listen(self, song_id):
        self.last_listened_songs.append(song_id)

        if len(self.last_listened_songs) > LAST_LISTENED_QUEUE_SIZE:
            self.last_listened_songs.pop(0)

    def add_grade(self, song_id, liked):
        if liked:
            self.liked_song_ids.append(song_id)
        else:
            self.disliked_song_ids.append(song_id)

    def get_next_recommended_song(self):
        if self.last_listened_playlist_song + 1 >= len(self.current_playlist):
            self.update_recommendations()
            self.last_listened_playlist_song = -1

        return self.current_playlist[self.last_listened_playlist_song + 1]

    def go_to_next_playlist_song(self):
        self.last_listened_playlist_song += 1


In [5]:
# program interface

import time

def ask_favorites():
    all_genres = all_songs.sort_values('popularity', ascending=False).genre.unique()
    all_artists = all_songs.sort_values('popularity', ascending=False).artist_name.unique()
    
    favorites = {
        'genres': [],
        'artists': []
    }

    display(pd.DataFrame(all_genres, columns=['Genres']))
    print(f'Choose {INITIAL_FAVORITE_COUNT} favorite genres (type in its name):')
    time.sleep(0.2)
    
    for i in range(INITIAL_FAVORITE_COUNT):
        while True:
            new_genre = input()
            try:
                if new_genre in all_genres:
                    favorites['genres'].append(new_genre)
                else:
                    raise ValueError('Invalid genre')
            except Exception as e:
                print(e)
                continue
                
            break

    print()
    display(pd.DataFrame(all_artists, columns=['Artists']).head(50))
    
    print(f'Choose {INITIAL_FAVORITE_COUNT} favorite artists (type in its name):')
    time.sleep(0.2)
    
    for i in range(INITIAL_FAVORITE_COUNT):
        while True:
            new_artist = input()
            try:
                if new_artist in all_artists:
                    favorites['artists'].append(new_artist)
                else:
                    raise ValueError('Invalid artist')
            except Exception as e:
                print(e)
                continue
                
            break

    print()
    return favorites

def ask_grade():
    while True:
        print('Please, grade listened song: ')
        print('0: Skip')
        print('1: Like')
        print('2: Dislike')
        time.sleep(0.2)
        
        try:
            grade = int(input())
        except Exception as e:
            print(e)
            continue
        
        print()
        break
        
    return grade

def print_playlist(playlist):
    display(all_songs.iloc[playlist][['artist_name', 'track_name']].style.hide_index())
    

In [None]:
user = User('Arsen')
favorites = ask_favorites()
user.add_favorites(genres=favorites['genres'], artists=favorites['artists'])

last_playlist = user.get_current_playlist()

for i in range(100):
    current_song = user.get_next_recommended_song()
    
    if user.get_current_playlist() != last_playlist:
        print('Your current playlist:')
        print_playlist(user.get_current_playlist())
        last_playlist = user.get_current_playlist()
    
    artist = all_songs.iloc[current_song]['artist_name']
    track_name = all_songs.iloc[current_song]['track_name']
    genre = all_songs.iloc[current_song]['genre']
    
    print(f'Current track: {artist} - {track_name} ({genre})')
    user.listen(current_song)

    grade = ask_grade()
    if grade == 1:
        user.add_grade(current_song, liked=True)
    elif grade == 2:
        user.add_grade(current_song, liked=False)

    user.go_to_next_playlist_song()

Unnamed: 0,Genres
0,Dance
1,Pop
2,Rap
3,Hip-Hop
4,Reggaeton
5,Indie
6,Electronic
7,Rock
8,R&B
9,Children’s Music


Choose 3 favorite genres (type in its name):
Pop
Rap
Dance



Unnamed: 0,Artists
0,Ariana Grande
1,Post Malone
2,Daddy Yankee
3,Ava Max
4,Pedro Capó
5,Halsey
6,Marshmello
7,Sam Smith
8,DJ Snake
9,Lady Gaga


Choose 3 favorite artists (type in its name):
Ed Sheeran
Bruno Mars
Maroon 5

Your current playlist:


artist_name,track_name
Nashville Cast,Swept Away
Hot Chip,Over And Over
Kim Petras,Close Your Eyes
Eric B. & Rakim,Don't Sweat The Technique
Ed Sheeran,Bibia Be Ye Ye
Kamaiyah,Fuck It up (feat. Yg)
$uicideBoy$,275 $uicide
Nelly,Pimp Juice
Maroon 5,Whiskey (feat. A$AP Rocky)
Trey Songz,SmartPhones


Current track: Nashville Cast - Swept Away (Country)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
2

Current track: Hot Chip - Over And Over (Dance)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
2

Current track: Kim Petras - Close Your Eyes (Indie)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
2

Current track: Eric B. & Rakim - Don't Sweat The Technique (Rap)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
2

Current track: Ed Sheeran - Bibia Be Ye Ye (Pop)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
1

Current track: Kamaiyah - Fuck It up (feat. Yg) (Rap)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
1

Current track: $uicideBoy$ - 275 $uicide (Hip-Hop)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
2

Current track: Nelly - Pimp Juice (Dance)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
1

Current track: Maroon 5 - Whiskey (feat. A$AP Rocky) (Pop)
Please, grade listened song: 
0: Skip
1: Like
2

artist_name,track_name
Martin Lawrence,Driving While High
Ed Sheeran,Candle In The Wind - 2018 Version
Pennywise,Fuck Authority
Coast Modern,Guru
Joji,TEST DRIVE
Mark Ronson,"Ooh Wee (feat. Ghostface Killah, Nate Dogg, Trife & Saigon)"
Tribal Seeds,Right On Time
Jason Mraz,I'm Yours
Jon Bellion,All Time Low
Westlife,Uptown Girl - Radio Edit


Current track: Martin Lawrence - Driving While High (Comedy)
Please, grade listened song: 
0: Skip
1: Like
2: Dislike
