In [2]:
import requests
import base64

In [3]:
CLIENT_ID = "CLIENT_ID"
SECRET_ID = "SECRET_ID"

In [4]:
# Base64 encode the client ID and client secret
client_credentials = f"{CLIENT_ID}:{SECRET_ID}"
client_credentials_base64 = base64.b64encode(client_credentials.encode())

In [5]:
# Request the access token
token_url = 'https://accounts.spotify.com/api/token'
headers = {
    'Authorization': f'Basic {client_credentials_base64.decode()}'
}
data = {
    'grant_type': 'client_credentials'
}
response = requests.post(token_url, data=data, headers=headers)

if response.status_code == 200:
    access_token = response.json()['access_token']
    print("Access token obtained successfully.")
else:
    print("Error obtaining access token.")
    exit()

Access token obtained successfully.


In [7]:
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyOAuth

In [20]:
def get_trend_play(playlist_id, access_token):

    #Set up Spotify with the access token 

    sp = spotipy.Spotify(auth=access_token)


    #Get the playlist tracks
    playlist_tracks = sp.playlist_tracks(playlist_id, fields="items(track(id, name, artists, album(id,name)))")

    #Extract relevant information and store in a list of dictionaries

    music_data = []

    for track_info in playlist_tracks['items']:
        track = track_info['track']
        track_name = track['name']
        artists = ', '.join([artist['name'] for artist in track["artists"]])
        album_name = track['album']['name']
        album_id = track['album']['id']
        track_id = track['id']

        #Get audio features for the track
        audio_features = sp.audio_features(track_id)[0] if track_id != 'Not available' else None

         # Get release date of the album
        try:
            album_info = sp.album(album_id) if album_id != 'Not available' else None
            release_date = album_info['release_date'] if album_info else None
        except:
            release_date = None

        #Get popularity of the track
        try:
            track_info = sp.track(track_id) if track_id != "Not available" else None
            popularity = track_info['popularity'] if track_info else None
        except:
            popularity= None

        #Add additional track information to the track data
        track_data = {
            'Track Name': track_name,
            'Artists': artists,
            'Album Name': album_name,
            'Album ID': album_id,
            'Track ID': track_id,
            'Popularity': popularity,
            'Release Date': release_date,
            'Duration (ms)': audio_features['duration_ms'] if audio_features else None,
            'Explicit': track_info.get('explicit', None),
            'External URLs': track_info.get('external_urls', {}).get('spotify', None),
            'Danceability': audio_features['danceability'] if audio_features else None,
            'Energy': audio_features['energy'] if audio_features else None,
            'Key': audio_features['key'] if audio_features else None,
            'Loudness': audio_features['loudness'] if audio_features else None,
            'Mode': audio_features['mode'] if audio_features else None,
            'Speechiness': audio_features['speechiness'] if audio_features else None,
            'Acousticness': audio_features['acousticness'] if audio_features else None,
            'Instrumentalness': audio_features['instrumentalness'] if audio_features else None,
            'Liveness': audio_features['liveness'] if audio_features else None,
            'Valence': audio_features['valence'] if audio_features else None,
            'Tempo': audio_features['tempo'] if audio_features else None,
            # Add more attributes as needed
        }

        music_data.append(track_data)
    
    #Create a pandas DF from the list of dict


    df = pd.DataFrame(music_data)

    return df







In [21]:
playlist_id = '0OArDe1MCrQLPjr1YVVls1'

# Call the function to get the music data from the playlist and store it in a DataFrame
music_df = get_trend_play(playlist_id, access_token)

# Display the DataFrame
print(music_df)

                         Track Name                   Artists Album Name  \
0                   Interpretación  Las Pastillas del Abuelo       2020   
1                   Rocanrol N´n´n´  Las Pastillas del Abuelo       2020   
2                Azúcar Impalpable  Las Pastillas del Abuelo       2020   
3            El Encanto del Flagelo  Las Pastillas del Abuelo       2020   
4                           Neblina  Las Pastillas del Abuelo       2020   
..                              ...                       ...        ...   
95           Que Carajo Es el Amor?  Las Pastillas del Abuelo     Crisis   
96  Quiero Tener Razón o Ser Feliz?  Las Pastillas del Abuelo     Crisis   
97                Que Vicios Tengo?  Las Pastillas del Abuelo     Crisis   
98     Donde Esconder Tantas Manos?  Las Pastillas del Abuelo     Crisis   
99          Cómo Pudo Entrar en Mi?  Las Pastillas del Abuelo     Crisis   

                  Album ID                Track ID  Popularity Release Date  \
0   1gYS

In [22]:
print(music_df.isnull().sum())

Track Name          0
Artists             0
Album Name          0
Album ID            0
Track ID            0
Popularity          0
Release Date        0
Duration (ms)       0
Explicit            0
External URLs       0
Danceability        0
Energy              0
Key                 0
Loudness            0
Mode                0
Speechiness         0
Acousticness        0
Instrumentalness    0
Liveness            0
Valence             0
Tempo               0
dtype: int64


In [24]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from datetime import datetime
from sklearn.metrics.pairwise import cosine_similarity

In [37]:
data = music_df

In [39]:
def calculate_weigth_pop (release_date):
    #Convert the release date to datetime object
    release_date = datetime.strptime(release_date, '%Y-%m-%d')

    #Calculate the time span between release date and today
    time_span = datetime.now() - release_date

    #Calculate the weighted popularity score base on time span
    weight = 1 / (time_span.days + 1)
    return weight

In [40]:
#Normalize the music features using minmax scaling
scaler = MinMaxScaler()

music_features = music_df[['Danceability', 'Energy', 'Key', 
                           'Loudness', 'Mode', 'Speechiness', 'Acousticness',
                           'Instrumentalness', 'Liveness', 'Valence', 'Tempo']].values

music_features_scaled = scaler.fit_transform(music_features)

In [42]:
def content_based_recom (input_song_name, num_recommendations=5):
    if input_song_name not in music_df["Track Name"].values:
        print(f"'{input_song_name}' not found in the dataset. lease enter a valid song name.")
        return
    
    #Get the index of the input song in the music DF
    input_song_index = music_df[music_df["Track Name"]==input_song_name].index[0]

    #Calculate the similarity 
    similarity_scores = cosine_similarity([music_features_scaled[input_song_index]], music_features_scaled)

    #Get the indices of the most similar songs
    similar_song_indices = similarity_scores.argsort()[0][::-1][1:num_recommendations+1]

     # Get the names of the most similar songs based on content-based filtering
    content_based_recommendations = music_df.iloc[similar_song_indices][['Track Name', 'Artists', 'Album Name', 'Release Date', 'Popularity']]

    return content_based_recommendations




In [43]:
# a function to get hybrid recommendations based on weighted popularity
def hybrid_recommendations(input_song_name, num_recommendations=5, alpha=0.5):
    if input_song_name not in music_df['Track Name'].values:
        print(f"'{input_song_name}' not found in the dataset. Please enter a valid song name.")
        return

    # Get content-based recommendations
    content_based_rec = content_based_recom(input_song_name, num_recommendations)

    # Get the popularity score of the input song
    popularity_score = music_df.loc[music_df['Track Name'] == input_song_name, 'Popularity'].values[0]

    # Calculate the weighted popularity score
    weighted_popularity_score = popularity_score * calculate_weigth_pop(music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0])

    # Combine content-based and popularity-based recommendations based on weighted popularity
    hybrid_recommendations = content_based_rec
    hybrid_recommendations = hybrid_recommendations.append({
        'Track Name': input_song_name,
        'Artists': music_df.loc[music_df['Track Name'] == input_song_name, 'Artists'].values[0],
        'Album Name': music_df.loc[music_df['Track Name'] == input_song_name, 'Album Name'].values[0],
        'Release Date': music_df.loc[music_df['Track Name'] == input_song_name, 'Release Date'].values[0],
        'Popularity': weighted_popularity_score
    }, ignore_index=True)

    # Sort the hybrid recommendations based on weighted popularity score
    hybrid_recommendations = hybrid_recommendations.sort_values(by='Popularity', ascending=False)

    # Remove the input song from the recommendations
    hybrid_recommendations = hybrid_recommendations[hybrid_recommendations['Track Name'] != input_song_name]


    return hybrid_recommendations

In [52]:
input_song_name = "Cerveza - Live In Buenos Aires / 2016"
recommendations = hybrid_recommendations(input_song_name, num_recommendations=5)
print(f"Hybrid recommended songs for '{input_song_name}':")
print(recommendations)

Hybrid recommended songs for 'Cerveza - Live In Buenos Aires / 2016':
                                          Track Name  \
4  Me Juego El Corazón? - Live In Buenos Aires / ...   
2        Qué Hago Yo Esperando un Puto As? - En Vivo   
0                                    Enano - En Vivo   
1           Inercia... - Live In Buenos Aires / 2016   
3                                  Cerveza - En Vivo   

                    Artists  \
4  Las Pastillas del Abuelo   
2  Las Pastillas del Abuelo   
0  Las Pastillas del Abuelo   
1  Las Pastillas del Abuelo   
3  Las Pastillas del Abuelo   

                                          Album Name Release Date  Popularity  
4  Vivo De Pastillas: Locura Y Realidad (Live In ...   2017-11-10        33.0  
2               10 Años en Vivo en el Luna (En Vivo)   2013-03-11        32.0  
0               10 Años en Vivo en el Luna (En Vivo)   2013-03-11        31.0  
1  Vivo De Pastillas: Locura Y Realidad (Live In ...   2017-11-10        31.0  
3     

  hybrid_recommendations = hybrid_recommendations.append({


In [50]:
music_df["Track Name"].values

array(['Interpretación', 'Rocanrol N´n´n´', 'Azúcar Impalpable',
       'El Encanto del Flagelo', 'Neblina', 'Dos Ángeles', 'El Favor',
       'Incontinencia Verbal', 'Más Lejos', 'Veinte',
       'Intro - Live In Buenos Aires / 2016',
       'Rompecabezas De Amor... - Live In Buenos Aires / 2016',
       'Saber Hacer... - Live In Buenos Aires / 2016',
       'Absolutismos… - Live In Buenos Aires / 2016',
       'Milagroso Eslabón... - Live In Buenos Aires / 2016',
       'Ella Dice... - Live In Buenos Aires / 2016',
       'Cerveza - Live In Buenos Aires / 2016',
       'Permiso Y Prometo... - Live In Buenos Aires / 2016',
       'Solo Dios (Almafuerte) - Live In Buenos Aires / 2016',
       'Artesano... - Live In Buenos Aires / 2016',
       'Inercia... - Live In Buenos Aires / 2016',
       'Gigantes... - Live In Buenos Aires / 2016',
       'Me Juego El Corazón? - Live In Buenos Aires / 2016',
       'Lo Que Tenga Que Ser... - Live In Buenos Aires / 2016',
       'Por Colectora