In [248]:
import pandas as pd
import numpy as np
import math
import random
import pickle
from IPython.display import display
RANDOM_STATE = 123
np.random.seed(123)

# Final System

## Load data

In [2]:
df_gym = pd.read_csv('data/modified_gym_members_exercise_tracking.csv')
df_heart_rates = pd.read_csv('data/gym_members_heart_rates.csv')
df_users = pd.read_csv('data/User Listening History_modified.csv')
df_music = pd.read_csv('data/Million Song Dataset kaggle/Music Info.csv')

In [3]:
df_music_info = df_music[['track_id', 'name', 'artist', 'energy', 'duration_ms']]

In [4]:
id_to_cluster = pd.read_csv('data/track_clusters.csv', index_col=0).iloc[:, 0]

## Stage 1 - Energy Calculator

In [5]:
import skfuzzy as fuzz
from skfuzzy import control as ctrl

In [6]:
class FuzzyController:
    def __init__(self):
        self.bpm_antecedent = ctrl.Antecedent(np.arange(30, 201, 1), 'BPM')
        self.bpm_variation_antecedent = ctrl.Antecedent(np.arange(-30, 31, 1), 'BPM Variation')
        self.intensity_consequent = ctrl.Consequent(np.arange(0, 1.01, 0.01), 'Intensity')

        self.bpm_antecedent.automf(5, names=['Very Low', 'Low', 'Medium', 'High', 'Very High'])
        self.bpm_variation_antecedent.automf(3, names=['Negative', 'Zero', 'Positive'])
        self.intensity_consequent.automf(3, names=['Low', 'Medium', 'High'])

        rule1 = ctrl.Rule(antecedent= (self.bpm_antecedent['Very Low'] |
                        (self.bpm_antecedent['Low'] & self.bpm_variation_antecedent['Negative']) |
                        (self.bpm_antecedent['Low'] & self.bpm_variation_antecedent['Zero']) |
                        (self.bpm_antecedent['Medium'] & self.bpm_variation_antecedent['Negative'])),
                        consequent=self.intensity_consequent['High'])
        rule2 = ctrl.Rule(antecedent=((self.bpm_antecedent['Low'] & self.bpm_variation_antecedent['Positive']) |
                                (self.bpm_antecedent['Medium'] & self.bpm_variation_antecedent['Zero']) |
                                (self.bpm_antecedent['High'] & self.bpm_variation_antecedent['Negative'])),
                                consequent=self.intensity_consequent['Medium'])
        rule3 = ctrl.Rule(antecedent=((self.bpm_antecedent['Medium'] & self.bpm_variation_antecedent['Positive']) |
                        (self.bpm_antecedent['High'] & self.bpm_variation_antecedent['Zero']) |
                        (self.bpm_antecedent['High'] & self.bpm_variation_antecedent['Positive'])),
                        consequent=self.intensity_consequent['Low'])

        intensity_ctrl = ctrl.ControlSystem([rule1, rule2, rule3])
        self.intensity_sim = ctrl.ControlSystemSimulation(intensity_ctrl)

    def calculate_intensity(self, bpm, bpm_variation, plot_consequent=False, plot_antecedent=False):
        self.intensity_sim.input['BPM'] = bpm
        self.intensity_sim.input['BPM Variation'] = bpm_variation
        self.intensity_sim.compute()
        
        if plot_consequent:
            self.intensity_consequent.view(sim=self.intensity_sim)
        if plot_antecedent:
            self.bpm_antecedent.view(sim=self.intensity_sim)
            self.bpm_variation_antecedent.view(sim=self.intensity_sim)
        
        return self.intensity_sim.output['Intensity']

    def view_bpm_antecedent(self):
        self.bpm_antecedent.view()
    
    def view_bpm_variation_antecedent(self):
        self.bpm_variation_antecedent.view()

    def view_intensity_consequent(self):
        self.intensity_consequent.view()


In [None]:
class EnergyCalculator:
    def __init__(self, df_gym_member, df_heart_rates, fuzzy_controller=None):
        self.df_gym_member = df_gym_member
        self.df_heart_rates = df_heart_rates
        self.sesion_minute = 0
        if fuzzy_controller is None:
            self.fuzzy_controller = FuzzyController()
        else:
            self.fuzzy_controller = fuzzy_controller

    def calculate_energy(self, plot_consequent=False, plot_antecedent=False):
        if self.sesion_minute == 0:
            return 0.6 # Default intensity for the first song
        if self.sesion_minute >= len(self.df_heart_rates):
            return -1 # Indicates that the session has ended
        bpm_current = self.df_heart_rates[self.sesion_minute]
        bpm_variation = bpm_current - self.df_heart_rates[self.sesion_minute - 1]
        # print(f"Calculating energy for session minute {self.sesion_minute}")
        # print(f"Previous BPM: {self.df_heart_rates[self.sesion_minute - 1]}, Current BPM: {bpm_current}, BPM Variation: {bpm_variation}")
        return self.fuzzy_controller.calculate_intensity(self.df_heart_rates[self.sesion_minute], bpm_variation, plot_consequent, plot_antecedent)
    
    def pass_song_duration(self, song_duration=2): # Song duration in minutes
        self.sesion_minute += song_duration
        if self.sesion_minute >= len(self.df_heart_rates):
            return -1
        return self.sesion_minute

    def get_session_minute(self):
        return self.sesion_minute
    
    def view_bpm_antecedent(self):
        self.fuzzy_controller.view_bpm_antecedent()
    
    def view_bpm_variation_antecedent(self):
        self.fuzzy_controller.view_bpm_variation_antecedent()
    
    def view_intensity_consequent(self):
        self.fuzzy_controller.view_intensity_consequent()

## Stage 2 - Recommendation System

Interaction matrix

In [62]:
from scipy.sparse import csr_matrix, csc_matrix

In [63]:
df_users_agg = df_users.groupby('user_id')['playcount'].agg(
    #total_playcount='sum',
    max_playcount='max'
).reset_index()
df_users_agg = df_users_agg.rename(columns={'playcount': 'max_playcount'})

df_users_rating = df_users.merge(df_users_agg, on='user_id')

df_users_rating['rating'] = df_users_rating['playcount'] / df_users_rating['max_playcount']

user_codes, user_uniques = pd.factorize(df_users['user_id'])
track_codes, track_uniques = pd.factorize(df_users['track_id'])

In [64]:
ALPHA = 40 #Value proposed in the paper "A Matrix Factorization Approach to Online Recommendation with Implicit Feedback" by Hu et al. (2008)

confidence_values = 1 + ALPHA * df_users_rating['playcount']

interaction_matrix_user_item = csr_matrix(
    (confidence_values, (user_codes, track_codes)),
    shape=(len(user_uniques), len(track_uniques))
)

### Matrix Factorization: Alternating Least Squares (ALS)

In [65]:
from implicit.als import AlternatingLeastSquares

In [66]:
class ALSRecommender:
    def __init__(self, interaction_matrix, track_uniques, df_music_info, als_model=None):
        self.interaction_matrix = interaction_matrix
        self.track_uniques = track_uniques
        self.df_music_info = df_music_info

        if als_model is None:
            self.als_model = AlternatingLeastSquares(factors=50, regularization=0.1, iterations=20, num_threads=-1, random_state=RANDOM_STATE)
            self.als_model.fit(self.interaction_matrix)
            # with open('models/als_model.pkl', 'rb') as f:
            #     self.als_model = pickle.load(f)
        else:
            self.als_model = als_model

        self.user_index = None
        self.recommendations = None # List of tuples (track_id, energy, similarity, has been recommended)

    def make_recommendations(self, user_index, n=100):
        self.user_index = user_index

        user_items = self.interaction_matrix.tocsr()[user_index]

        top_n_recommendations_indexes, top_n_recommendations_scores = self.als_model.recommend(user_index, user_items, N=n, filter_already_liked_items=True)

        track_ids = self.track_uniques[top_n_recommendations_indexes].tolist()
        
        df_filtered = self.df_music_info.set_index('track_id').loc[track_ids][['energy']].reset_index()

        self.recommendations = [(track_id, energy, similarity, False) for (track_id, energy), similarity in zip(df_filtered.itertuples(index=False, name=None), top_n_recommendations_scores)]

        return self.recommendations

    
    def recommend_song(self, energy, energy_margin=0.05):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        
        closest_track_index = None
        distance_to_energy = float('inf')

        for i, (track_id, track_energy, similarity, has_been_recommended) in enumerate(self.recommendations):
            distance = abs(track_energy - energy)

            if not has_been_recommended and distance <= energy_margin:
                self.recommendations[i] = (track_id, track_energy, similarity, True)
                return (track_id, track_energy)
            
            if not has_been_recommended and distance < distance_to_energy:
                closest_track_index = i
                distance_to_energy = distance
        
        if closest_track_index is not None:
            track_id, track_energy, _, _= self.recommendations[closest_track_index]
            self.recommendations[closest_track_index] = (track_id, track_energy, similarity, True)
            return (track_id, track_energy)

        raise ValueError("All recommendations have already been recommended")


    def get_recommendations(self):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        return self.recommendations


    def get_recommendations_ids(self):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        return [track_id for track_id, _, _, _ in self.recommendations]
    
    def get_recommendations_info(self):
        track_ids = [track_id for track_id, _, _, _ in self.recommendations]
        df_ordered = self.df_music_info.set_index('track_id').loc[track_ids].reset_index()
        return df_ordered

### Kmeans content-based filtering

In [67]:
class KmeansContentBasedRecommender: #For user history
    def __init__(self, id_to_cluster):
        self.id_to_cluster = id_to_cluster
        self.recommendations = None
    
    def make_cluster_recommendation(self, user_history):
        clusters = self.id_to_cluster[user_history]
        cluster_counts = clusters.value_counts()
        self.recommended_cluster = cluster_counts / len(clusters)
        return self.recommended_cluster

    def get_recommended_cluster(self):
        if self.recommended_cluster is None:
            raise ValueError("No cluster recommendation available. Please call make_cluster_recommendation first.")
        return self.recommendations
    

### Hybrid recommender

In [68]:
class HybridRecommender:
    def __init__(self, interaction_matrix, track_uniques, df_music_info, df_users, id_to_cluster, als_recommender = None, content_based_recommender = None, alpha = 1):
        if als_recommender is not None:
            self.collaborative_als_recommender = als_recommender
        else:
            self.collaborative_als_recommender = ALSRecommender(interaction_matrix, track_uniques, df_music_info)
        
        if content_based_recommender is not None:
            self.content_based_recommender = content_based_recommender  
        else:
            self.content_based_recommender = KmeansContentBasedRecommender(id_to_cluster)

        self.df_music_info = df_music_info
        self.df_users = df_users
        self.id_to_cluster = id_to_cluster
        self.alpha = alpha  # Alpha is a parameter to control the influence of content-based recommendations
        self.recommendations = None # List of tuples (track_id, energy, similarity, has been recommended)

    
    def make_recommendations(self, user_index, n=100):

        user_id = df_users['user_id'].unique()[user_index]
        user_history = self.df_users[self.df_users['user_id'] == user_id]['track_id']
        collaborative_recomendations = self.collaborative_als_recommender.make_recommendations(user_index, n)
        content_based_cluster_recommendation = self.content_based_recommender.make_cluster_recommendation(user_history)
        self.recommendations = []
        
        #We will apply a penalization to the collaborative filtering recommendation based on the user cluster preferences obtained by the content-based recommendation
        for track_id, energy, similarity, has_been_recommended in collaborative_recomendations:
            cluster_presence = 0.01 #Default multiplier. Used if the song's cluster is not in the user's cluster preferences (content-based recommendation)
            song_cluster = self.id_to_cluster[track_id]
            if song_cluster in content_based_cluster_recommendation.index:
                cluster_presence = content_based_cluster_recommendation[song_cluster]
            
            #print(track_id, song_cluster, multiplier)

            self.recommendations.append((track_id, energy, similarity * cluster_presence * self.alpha, has_been_recommended))
        self.recommendations = sorted(self.recommendations, key=lambda x: x[2], reverse=True)  # Sort new similarity


    def make_recommendations_only_collaborative(self, user_index, n=100):
        self.recommendations = self.collaborative_als_recommender.make_recommendations(user_index, n)
    
    def recommend_song(self, energy, energy_margin=0.05):
        return self.collaborative_als_recommender.recommend_song(energy, energy_margin)
    
    def get_recommendations(self):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        return self.recommendations
    
    def get_recommendations_ids(self):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        return [track_id for track_id, _, _, _ in self.recommendations]
    
    def get_recommendations_info(self):
        if self.recommendations is None:
            raise ValueError("No recommendations available. Please call make_recommendations first.")
        track_ids = [track_id for track_id, _, _, _ in self.recommendations]
        df_ordered = self.df_music_info.set_index('track_id').loc[track_ids].reset_index()
        return df_ordered

## Final System

In [113]:
class MusicRecommender2Stages:
    def __init__(self, energy_calculator, hybrid_recommender, user_index, df_music_info):
        self.energy_calculator = energy_calculator
        self.hybrid_recommender = hybrid_recommender
        self.user_index = user_index
        self.music_info = df_music_info


    def make_recommendations(self, n=100):
        self.hybrid_recommender.make_recommendations(self.user_index, n)
        
    
    def recommend_song(self):
        current_minute = self.energy_calculator.get_session_minute()
        energy = self.energy_calculator.calculate_energy()
        if energy == -1:
            return current_minute, None # Session has ended
        song_id, _ = self.hybrid_recommender.recommend_song(energy)
        song_duration_minutes = self.music_info[self.music_info['track_id'] == song_id]['duration_ms'].values[0] // 60000
        self.energy_calculator.pass_song_duration(song_duration_minutes)
        return current_minute, df_music_info[df_music_info['track_id'] == song_id]
    
    def pass_song_duration(self, song_duration=2):
        return self.energy_calculator.pass_song_duration(song_duration)
    
    def get_recommendations(self):
        return self.hybrid_recommender.get_recommendations()
    
    def get_recommendations_ids(self):
        return self.hybrid_recommender.get_recommendations_ids()
    
    def get_recommendations_info(self):
        return self.hybrid_recommender.get_recommendations_info()
    

In [93]:
df_music_info[df_music_info['track_id'] == 'TROWQGL128E0784BAA']['duration_ms'].values[0]//60000

np.int64(3)

## Example

In [266]:
df_gym.head()

Unnamed: 0,Age,Gender,Weight (kg),Height (m),Max_BPM,Avg_BPM,Resting_BPM,Session_Duration (hours),Calories_Burned,Workout_Type,Fat_Percentage,Water_Intake (liters),Workout_Frequency (days/week),Experience_Level,BMI
0,46,Female,74.9,1.53,179,151,66,1.3,883.0,HIIT,33.9,2.1,4,2,32.0
1,32,Female,68.1,1.66,167,122,54,1.11,677.0,Cardio,33.4,2.3,4,2,24.71
2,25,Male,53.2,1.7,190,164,56,0.59,532.0,Strength,28.8,2.1,3,1,18.41
3,38,Male,46.1,1.79,188,158,68,0.64,556.0,Strength,29.2,2.8,3,1,14.39
4,56,Female,58.0,1.68,168,156,74,1.59,1116.0,HIIT,15.5,2.7,5,3,20.55


In [267]:
client_used = 0
client_heart_rates = df_heart_rates[df_heart_rates['User_ID'] == client_used]['Heart_Rate'].tolist()

In [268]:
energy_calculator = EnergyCalculator(df_gym.iloc[client_used], client_heart_rates)

In [269]:
with open('models/als_model.pkl', 'rb') as f:
    als_model = pickle.load(f)

In [270]:
als_recommender = ALSRecommender(interaction_matrix_user_item, track_uniques, df_music_info, als_model=als_model)

In [271]:
hybrid_recommender = HybridRecommender(interaction_matrix_user_item, track_uniques, df_music_info, df_users, id_to_cluster, als_recommender=als_recommender)

In [272]:
music_recommender_2_stages = MusicRecommender2Stages(energy_calculator, hybrid_recommender, client_used, df_music_info)
music_recommender_2_stages.make_recommendations(n=200)
music_recommender_2_stages.get_recommendations_info().head(20)

Unnamed: 0,track_id,name,artist,energy,duration_ms
0,TROWQGL128E0784BAA,Taylor,Jack Johnson,0.725,239280
1,TRXMJMD128F146AE5B,Don't Worry Be Happy,Bobby McFerrin,0.205,219080
2,TRPTIGH128C71968FA,Love Comes Tumbling,U2,0.413,285893
3,TRWAUCC128F428E6FD,Staple It Together,Jack Johnson,0.794,196146
4,TRBVOHY128F92E6A11,There Must Be A Better World Somewhere,B.B. King,0.722,268200
5,TRAEUIW12903D018F0,You And Your Heart,Jack Johnson,0.552,196240
6,TRGGLQD12903D1256C,Fortunate Fool,Jack Johnson,0.267,231186
7,TRUJOHU128F424E6A6,White Winter Hymnal,Fleet Foxes,0.497,146400
8,TRFTUIW128E0784B9F,Bubble Toes,Jack Johnson,0.532,236853
9,TRAGSGF128E0780CA7,One I Love,Coldplay,0.776,275266


In [274]:
minute, df_recommended_song = music_recommender_2_stages.recommend_song()
print(f"Session minute: {minute}")
if df_recommended_song is None:
    print("Session has already ended.")
else:
    print(f"Recommended song:")
    display(df_recommended_song)

Session minute: 3
Recommended song:


Unnamed: 0,track_id,name,artist,energy,duration_ms
6191,TRXMJMD128F146AE5B,Don't Worry Be Happy,Bobby McFerrin,0.205,219080
