In [1]:
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [10]:
# Load the dataset
data = pd.read_csv('../../../data/csv/mySpotifyData.csv',index_col=0)

data = data.iloc[:10000] 

In [11]:
data.head()

Unnamed: 0,track_name,artist_name,album_name,genre,duration_ms,popularity,explicit,track_id,artist_id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,P.Y.T. (Pretty Young Thing),Karizma Duo,Acoustic Cover Album,acoustic,185266,0,False,61SsC1KU762IMSJ1kEO5cQ,4AG4GdKn7FmC3EPk8m6dxg,0.802,0.311,9,-7.537,0,0.0341,0.763,0.0,0.11,0.492,120.217
1,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
2,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
3,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334
4,King Jesus,Big Joe Williams,Shake Your Boogie,acoustic,103200,0,False,40BZMRed29k0DIr8QmlFtT,07NzVZ0BHZ0QOOw7nGvCgo,0.469,0.166,10,-18.608,1,0.0702,0.87,0.468,0.442,0.593,85.334


### How Interaction Features Help in Artist Recommendations

Enhanced Personalization: Interaction features can capture nuanced relationships between different aspects of music and listener preferences, leading to more personalized recommendations. Improved Predictive Power: By including interactions, the model may better understand the complex dynamics that influence music preferences, potentially improving the accuracy of recommendations. Richer Feature Set: Interaction features enrich the feature set, providing the model with more information to distinguish between different artists and user preferences.

#### Here in our given dataset, am going to use the combination of  "genre and popularity" and "Energy and tempo". Let's us experiement with each and their results

In [12]:
# Check the unique genres in the dataset
unique_genres = data['genre'].unique()

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Fit LabelEncoder on unique genres and transform genre column
data['genre_encod'] = label_encoder.fit_transform(data['genre'])

In [13]:
# Assuming 'popularity' is a feature and 'genre_encoded' is a one-hot encoded feature of genres
data['popularity_genre_interaction'] = data['popularity'] * data['genre_encod']

# You can create more complex interactions as needed
data['energy_tempo_interaction'] = data['energy'] * data['tempo']

In [21]:
artist_features_pg = data.groupby('artist_name').mean(numeric_only=True)[['popularity_genre_interaction']]
artist_features_et = data.groupby('artist_name').mean(numeric_only=True)[['energy_tempo_interaction']]
artist_features = data.groupby('artist_name').mean(numeric_only=True)[['popularity_genre_interaction','energy_tempo_interaction']]

In [24]:
artist_features_pg.head()

Unnamed: 0_level_0,popularity_genre_interaction
artist_name,Unnamed: 1_level_1
1da Banton,15.0
2Baba,4.727273
33Miles,27.5
9ice,18.185185
A Fine Frenzy,0.0


In [25]:
artist_features_et.head()

Unnamed: 0_level_0,energy_tempo_interaction
artist_name,Unnamed: 1_level_1
1da Banton,60.677561
2Baba,113.371628
33Miles,69.927475
9ice,74.504932
A Fine Frenzy,40.427968


In [26]:
artist_features.head()

Unnamed: 0_level_0,popularity_genre_interaction,energy_tempo_interaction
artist_name,Unnamed: 1_level_1,Unnamed: 2_level_1
1da Banton,15.0,60.677561
2Baba,4.727273,113.371628
33Miles,27.5,69.927475
9ice,18.185185,74.504932
A Fine Frenzy,0.0,40.427968


## Similarity

In [27]:
artist_features_matrix_et = cosine_similarity(artist_features_et)
print(artist_features_matrix_et)

[[1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 ...
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]
 [1. 1. 1. ... 1. 1. 1.]]


In [28]:
artist_features_matrix_pg = cosine_similarity(artist_features_pg)
print(artist_features_matrix_pg)

[[1. 1. 1. ... 0. 0. 1.]
 [1. 1. 1. ... 0. 0. 1.]
 [1. 1. 1. ... 0. 0. 1.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 1. 1. ... 0. 0. 1.]]


In [29]:
artist_features_matrix = cosine_similarity(artist_features)
print(artist_features_matrix)

[[1.         0.97993195 0.99125623 ... 0.97077681 0.97077681 0.9573267 ]
 [0.97993195 1.         0.94506153 ... 0.99913181 0.99913181 0.8805064 ]
 [0.99125623 0.94506153 1.         ... 0.93062239 0.93062239 0.98709097]
 ...
 [0.97077681 0.99913181 0.93062239 ... 1.         1.         0.85999324]
 [0.97077681 0.99913181 0.93062239 ... 1.         1.         0.85999324]
 [0.9573267  0.8805064  0.98709097 ... 0.85999324 0.85999324 1.        ]]


## Get Top 5 Artists Recommendations

In [31]:
def get_artist_recommendations(artist_name, similarity_matrix, artist_features, top_n=5):
    # Find the index of the artist in the features DataFrame
    artist_idx = artist_features.index.get_loc(artist_name)
    
    # Get the similarity scores for the artist and sort them
    similarity_scores = list(enumerate(similarity_matrix[artist_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    
    # Get the indices of the top-n most similar artists
    most_similar_indices = [i[0] for i in similarity_scores[1:top_n+1]]  # Skip the first one (self-similarity)
    
    # Get the artist names corresponding to the indices
    most_similar_artists = artist_features.iloc[most_similar_indices].index.tolist()
    
    return most_similar_artists

In [44]:
# Method 1
artist_of_interest = 'Vendredi'
similar_artists = get_artist_recommendations(artist_of_interest, artist_features_matrix_et, artist_features_et, top_n=5)
print(f"Artists similar to {artist_of_interest}: {similar_artists}")

Artists similar to Vendredi: ['2Baba', '33Miles', '9ice', 'A Fine Frenzy', 'A Mose']


In [45]:
# Method 2
artist_of_interest = 'Vendredi'
similar_artists = get_artist_recommendations(artist_of_interest, artist_features_matrix_pg, artist_features_pg, top_n=5)
print(f"Artists similar to {artist_of_interest}: {similar_artists}")

Artists similar to Vendredi: ['2Baba', '33Miles', '9ice', 'A Fine Frenzy', 'A Mose']


In [48]:
# Method 3
artist_of_interest = 'Alvin Youngblood Hart'
similar_artists = get_artist_recommendations(artist_of_interest, artist_features_matrix, artist_features, top_n=5)
print(f"Artists similar to {artist_of_interest}: {similar_artists}")

Artists similar to Alvin Youngblood Hart: ['AJJ', 'ATR', 'Adam Werner', 'Agustín Amigó', 'Albert Collins']


In [43]:
data['artist_name'].unique()

array(['Karizma Duo', 'Big Joe Williams', 'Vendredi', 'Leroy Carr',
       'Daniel Robinson', 'Revisions', 'Disney Peaceful Guitar',
       'Son House', 'Eric Lumiere', 'Memphis Minnie', 'Muriel Anderson',
       'Shannon & Keast', 'NORMANDY', 'Lead Belly', 'Keith McInally',
       'Jason Mraz', 'Grace George', 'Big Bill Broonzy', 'Graham Colton',
       'Kazuyoshi Saito', 'Mike Howe', 'Tico Moon',
       'Mississippi Fred McDowell', 'Thom Cooper', 'Junior Kimbrough',
       'Steve Petrunak', 'Jenny & Tyler', 'Roostz', 'Nylonwings',
       'David Mead', 'Gab De La Vega', 'David Berkeley', 'Noah Gundersen',
       'Skip James', 'Mat Kearney', 'Covers Culture', 'Big Mama Thornton',
       'YUZU', 'Sara Jackson-Holman', 'Blind Boy Fuller',
       'Charley Patton', 'John Standefer', 'Memphis Jug Band',
       'Robert Pete Williams', 'Jota John', 'Jürgen Saalmann',
       'Aleko Nunez', 'Adam Werner', 'Rachael Yamagata', 'Maiden United',
       'Tim Barry', 'Frank Turner', 'Iron & Wine', 'D

## Positively Interacted Artists

In [49]:
# Define a threshold for the number of tracks listened to
threshold = 20  # For example, consider an artist positively interacted with if the user has listened to more than 5 tracks by that artist

# Identify positively interacted artists
positively_interacted_artists = data['artist_name'].value_counts()
positively_interacted_artists = positively_interacted_artists[positively_interacted_artists > threshold].index.tolist()

In [50]:
positively_interacted_artists[0:10]

['Levon Helm',
 'Dr. John',
 'Soda Stereo',
 'TV On The Radio',
 'Skank',
 'Rev. Gary Davis',
 'Mudhoney',
 'Talking Heads',
 'Sepultura',
 'Melvins']