In [132]:
import os
import time
import numpy as np
import requests
import spotipy
import musicbrainzngs as mb
from dotenv import load_dotenv
import pandas as pd


load_dotenv()

True

In [82]:

# Obtener los valores del archivo .env
app_name = os.getenv('APP_NAME')
app_version = os.getenv('APP_VERSION')  # Esta es la versión de tu aplicación
contact_info = os.getenv('CONTACT_INFO')

mb.set_useragent(app_name, app_version, contact_info)



### SPOTIFY ###

In [129]:
# Spotify API 
CLIENT_ID = os.getenv("CLIENT_ID")
CLIENT_SECRET = os.getenv("CLIENT_SECRET")
from spotipy.oauth2 import SpotifyClientCredentials

#Initialize SpotiPy with user credentials
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=CLIENT_ID,
                                                           client_secret=CLIENT_SECRET))

In [133]:
def get_track_details(name_string,sp=sp):
    result= sp.search(q=f'{name_string}', limit=1)
    track=result['tracks']['items'][0]

    return {
        'id': track['id'],
        'name': track['name'],
        'album': track['album']['name'],
        'popularity': track['popularity'],
        'artists': [artist['name'] for artist in track['artists']],
        'release_date': track['album']['release_date'],
    }

In [134]:
def get_track_analysis(id,sp=sp):
    """
    Fetch the audio features for a given track id.

    Parameters
    ----------
    id : str
        The Spotify track ID.
    sp : SpotifyClientCredentials
        Spotify client credentials object.

    Returns
    -------
    dict
        Audio features of the track.
    """

    result= sp.audio_features(id)[0]
    return {
        'danceability': result['danceability'],
        'energy':result['energy'],
        'key': result['key'],
        'loudness': result['loudness'],
        'mode': result['mode'],
        'speechiness':result['speechiness'],
        'acousticness': result['acousticness'],
        'instrumentalness': result['instrumentalness'],
        'liveness': result['liveness'],
        'valence': result['valence'],
        'tempo': result['tempo'],
        'duration': result['duration_ms']
    }

In [139]:
def create_track_complete_analysis(df):

    # if there's a csv file, we can use it as starting point.
    if "id" not in df.columns:
        df["id"] = np.nan

    # Lista de diccionarios de cada canción 

    for i in range(len(df)):
        if pd.isna(df.loc[i,"id"]):
            try:
                song = df.loc[i,"Title"] + " " + df.loc[i,"Artist"]

                # Info de la canción
                track_details = get_track_details(song)
                id = track_details['id']  # id de la canción que usamos para analysis
                
                # Llamamos a las funciones de genre y analysis
                track_genre = get_track_analysis(id)
                
                # Añadimos toda la info en el df
                df.loc[i, "id"] = track_details['id']
                df.loc[i,'title_spotify'] = track_details['name']
                df.loc[i,"album"] = track_details['album']
                df.loc[i,'sp_popularity'] = track_details['popularity']
                df.loc[i,"colab"] = ("Y" if len(track_details['artists']) > 1 else "N")
                df.loc[i,"release_date"] = track_details['release_date']
                df.loc[i,"danceability"] = track_genre['danceability']
                df.loc[i,'energy'] = track_genre['energy']
                df.loc[i,'loudness'] = track_genre['loudness']
                df.loc[i,'speechiness'] = track_genre['speechiness']
                df.loc[i,'acousticness'] = track_genre['acousticness']
                df.loc[i,'instrumentalness'] = track_genre['instrumentalness']
                df.loc[i,'liveness'] = track_genre['liveness']
                df.loc[i,'valence'] = track_genre['valence']
                df.loc[i,'key'] = track_genre['key']
                df.loc[i,'mode'] = track_genre['mode']
                df.loc[i,'tempo'] = track_genre['tempo']
                df.loc[i,'duration']= track_genre['duration']
            
                df.to_csv("spotify.csv", index=False)
                time.sleep(3)

            except Exception as e:
                print(f"Error obteniendo detalles para '{song}': {e}")

    # Convertir la lista de diccionarios en un DataFrame
    return df


### ACOUSTIC BRAINZ ###

In [83]:
def get_track_mbid(song_name, artist_name):
    time.sleep(2)  # Pausa para no exceder el límite de solicitudes
    try:
        # Realizar la búsqueda con campos específicos
        resultados = mb.search_recordings(recording=song_name, artist=artist_name, limit=5, strict=True)  # Aumentamos el límite para buscar más coincidencias
        
        # Filtrar resultados para obtener el que coincide exactamente con el artista
        for resultado in resultados['recording-list']:
            for artist_credit in resultado['artist-credit']:
                if (artist_credit['artist']['name'].lower() == artist_name.lower() and 
                    resultado['title'].lower() == song_name.lower()):
                    return resultado['id'] 
        
        print(f"No se encontraron resultados exactos para '{song_name}' por '{artist_name}'.")
        return None
            
    except Exception as e:
        print(f"Ocurrió un error: {e}")
        return None

In [84]:
mb.search_recordings(recording='Yellow', artist='Coldplay', limit=5, strict=True)

{'recording-list': [{'id': 'ea9cab6f-ee5e-4942-afff-47f1f1e53c47',
   'ext:score': '100',
   'title': 'Yellow',
   'disambiguation': 'live, 2005-07-13: Piazza Grande, Locarno, Switzerland',
   'artist-credit': [{'name': 'Coldplay',
     'artist': {'id': 'cc197bad-dc9c-440d-a5b5-d52ba2e14234',
      'name': 'Coldplay',
      'sort-name': 'Coldplay',
      'alias-list': [{'locale': 'ja',
        'sort-name': 'コールドプレイ',
        'type': 'Artist name',
        'alias': 'コールドプレイ'},
       {'sort-name': 'Coldplay, The',
        'type': 'Artist name',
        'end-date': '1998',
        'alias': 'The Coldplay'},
       {'sort-name': 'Cold Play', 'type': 'Search hint', 'alias': 'Cold Play'},
       {'sort-name': '콜드플레이', 'type': 'Search hint', 'alias': '콜드플레이'}]}}],
   'release-list': [{'id': 'ea634515-58a9-4495-9c3f-57a8f5e0ee3e',
     'title': '2005-07-13: Piazza Grande, Locarno, Switzerland',
     'status': 'Bootleg',
     'artist-credit': [{'name': 'Coldplay',
       'artist': {'id': 'cc197

In [85]:
get_track_mbid('bad romance', 'lady gaga')

'1cd69228-2994-4016-8bd9-bb417da3d840'

In [86]:
def get_mbid_lista(canciones):
    mbids_list = []
    for cancion in canciones:
        mbid = get_track_mbid(cancion[0],cancion[2])
        if mbid:
            mbids_list.append(mbid)  
        else:
            print(f"No se encontró el MBID para '{cancion}'.")

    return mbids_list

In [87]:
canciones=[('Bad Romance', 'pop', 'Lady Gaga'), ('Yellow', 'pop', 'Coldplay'), ('Viva la Vida', 'pop', 'Coldplay'), ('Poker Face', 'pop', 'Lady Gaga')]
mbid_list=get_mbid_lista(canciones)


In [88]:
mbid_list

['f74f3abd-47b1-4b25-8552-3d32d39c6683',
 '5bb81063-911d-4e32-8221-59690e18252a',
 '8ea17c34-77de-4c31-970a-e50e379d8013',
 '35618652-47d7-495d-806a-ee1b88eeb776']

In [89]:
def get_high_level_data(mbid):
    try:
        url = f"https://acousticbrainz.org/api/v1/{mbid}/high-level"
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error: {response.status_code} - {response.text}")
            return None
    except Exception as e:
        print(f"Ocurrió un error: {e}")
        return None

In [90]:
get_high_level_data('ef534184-597f-4cad-9853-2e6b592ecf04')

{'highlevel': {'danceability': {'all': {'danceable': 0.997425496578,
    'not_danceable': 0.00257450062782},
   'probability': 0.997425496578,
   'value': 'danceable',
   'version': {'essentia': '2.1-beta4',
    'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1',
    'essentia_git_sha': 'v2.1_beta4',
    'extractor': 'music 1.0',
    'gaia': '2.4.5',
    'gaia_git_sha': 'v2.4.4-44-g95f4851',
    'models_essentia_git_sha': 'v2.1_beta1'}},
  'gender': {'all': {'female': 0.366486102343, 'male': 0.633513867855},
   'probability': 0.633513867855,
   'value': 'male',
   'version': {'essentia': '2.1-beta4',
    'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1',
    'essentia_git_sha': 'v2.1_beta4',
    'extractor': 'music 1.0',
    'gaia': '2.4.5',
    'gaia_git_sha': 'v2.4.4-44-g95f4851',
    'models_essentia_git_sha': 'v2.1_beta1'}},
  'genre_dortmund': {'all': {'alternative': 0.0294795874506,
    'blues': 0.00316091720015,
    'electronic': 0.956235349178,
    

In [91]:
def get_high_level_data_for_multiple_ids(mbid_list):
    results = []  
    
    for mbid in mbid_list:
        data = get_high_level_data(mbid) 

        if data is not None:
            results.append(data) 
        
        time.sleep(2) 
    
    return results 

In [92]:
#mbid_list=['ef534184-597f-4cad-9853-2e6b592ecf04', '67738719-51f9-4bda-a933-9862bf33f42a']

In [93]:
mbid_list

['f74f3abd-47b1-4b25-8552-3d32d39c6683',
 '5bb81063-911d-4e32-8221-59690e18252a',
 '8ea17c34-77de-4c31-970a-e50e379d8013',
 '35618652-47d7-495d-806a-ee1b88eeb776']

In [94]:
get_high_level_data_for_multiple_ids(mbid_list)

Error: 404 - {"message":"Not found"}

Error: 404 - {"message":"Not found"}

Error: 404 - {"message":"Not found"}



[{'highlevel': {'danceability': {'all': {'danceable': 0.975116968155,
     'not_danceable': 0.0248830020428},
    'probability': 0.975116968155,
    'value': 'danceable',
    'version': {'essentia': '2.1-beta4',
     'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1',
     'essentia_git_sha': 'v2.1_beta4',
     'extractor': 'music 1.0',
     'gaia': '2.4.5',
     'gaia_git_sha': 'v2.4.4-44-g95f4851',
     'models_essentia_git_sha': 'v2.1_beta1'}},
   'gender': {'all': {'female': 0.791389405727, 'male': 0.208610579371},
    'probability': 0.791389405727,
    'value': 'female',
    'version': {'essentia': '2.1-beta4',
     'essentia_build_sha': 'b0b9016bb43cc2dafcda53132c1610db4853c6a1',
     'essentia_git_sha': 'v2.1_beta4',
     'extractor': 'music 1.0',
     'gaia': '2.4.5',
     'gaia_git_sha': 'v2.4.4-44-g95f4851',
     'models_essentia_git_sha': 'v2.1_beta1'}},
   'genre_dortmund': {'all': {'alternative': 0.0515270605683,
     'blues': 0.00970970280468,
     'electron

### LAST FM ###

In [95]:

def get_top_tags_lastfm(artist, track):
    # Desde .env carga las variables
    api_key_lastfm = os.getenv('API_KEY_LASTFM')
    
    # Tags más populares de la pista
    url = f'https://ws.audioscrobbler.com/2.0/?method=track.getTopTags&api_key={api_key_lastfm}&artist={artist}&track={track}&format=json'

    response = requests.get(url)
    if response.status_code == 200:

        tags = response.json()
        if 'toptags' in tags and tags['toptags']['tag']:
            # Top son solo las primeras 4 tags
            top_tags = tags['toptags']['tag'][:4]  
            return [tag['name'] for tag in top_tags]
        else:
            return []  
    else:
        raise Exception(f'Error: {response.status_code} - {response.text}')  

In [96]:
get_top_tags_lastfm("Lady gaga", "Bad romance")

['pop', 'Lady Gaga', 'dance', 'female vocalists']

In [97]:
genres = [
    'pop', 'rock', 'hip hop', 'jazz', 'classical',
    'r&b', 'reggae', 'country', 'electronic', 'blues',
    'metal', 'folk', 'latin', 'punk', 'disco',
    'indie', 'soul', 'house', 'ska', 'alternative'
]

In [122]:
def get_top_tracks_lastfm(genre):
    
    # Load the API key from environment variables
    api_key_lastfm = os.getenv('API_KEY_LASTFM')
    if not api_key_lastfm:
        raise ValueError("The Last.fm API key is not set in the environment variables.")
    
    # Initialize an empty list for tracks
    top_tracks = []
    
    # URL to get the top tracks for the specified genre
    url = f'https://ws.audioscrobbler.com/2.0/?method=tag.getTopTracks&tag={genre}&api_key={api_key_lastfm}&format=json&page=1'
    
    response = requests.get(url)
    
    # Check if the response is successful
    if response.status_code == 200:
        tracks = response.json()
        
        if 'tracks' in tracks and tracks['tracks']['track']:
            # Get the first 50 tracks
            for track in tracks['tracks']['track'][50:100]:
                time.sleep(2)  
                top_tracks.append((track['name'], genre, track['artist']['name']))
            return top_tracks  # Return the list of top tracks
        else:
            print("No tracks found.")
            return None  # Return None if no tracks found
    else:
        print(f'Error querying the API: {response.status_code} - {response.text}')
        return None  # Exit on error



In [104]:
genres

['pop',
 'rock',
 'hip hop',
 'jazz',
 'classical',
 'r&b',
 'reggae',
 'country',
 'electronic',
 'blues',
 'metal',
 'folk',
 'latin',
 'punk',
 'disco',
 'indie',
 'soul',
 'house',
 'ska',
 'alternative']

In [99]:
def get_all_top_tracks(genres):
    all_top_tracks = []
    for genre in genres:
        tracks = get_top_tracks_lastfm(genre)
        if tracks:  # Only add if tracks were found
            all_top_tracks.extend(tracks)  # Agregar los tracks encontrados a la lista total
    return all_top_tracks

In [123]:
genres_songs_2=get_all_top_tracks(genres)

In [126]:
genres_songs_2.tocsv('genres_songs_2.csv')

[('Feel Good Inc.', 'pop', 'Gorillaz'),
 ('Yellow', 'pop', 'Coldplay'),
 ('Viva la Vida', 'pop', 'Coldplay'),
 ('Poker Face', 'pop', 'Lady Gaga'),
 ('The Scientist', 'pop', 'Coldplay'),
 ('Kids', 'pop', 'MGMT'),
 ('Somebody Told Me', 'pop', 'The Killers'),
 ('Billie Jean', 'pop', 'Michael Jackson'),
 ('Wonderwall', 'pop', 'Oasis'),
 ('Pumped Up Kicks', 'pop', 'Foster the People'),
 ('Take on Me', 'pop', 'a-ha'),
 ('Bad Romance', 'pop', 'Lady Gaga'),
 ('Heartless', 'pop', 'Kanye West'),
 ('Toxic', 'pop', 'Britney Spears'),
 ('Clocks', 'pop', 'Coldplay'),
 ('Rolling in the Deep', 'pop', 'Adele'),
 ('I Kissed a Girl', 'pop', 'Katy Perry'),
 ('Fix You', 'pop', 'Coldplay'),
 ('Somebody That I Used To Know', 'pop', 'Gotye'),
 ('Just Dance', 'pop', 'Lady Gaga'),
 ('Everybody Wants to Rule the World', 'pop', 'Tears for Fears'),
 ('Back to Black', 'pop', 'Amy Winehouse'),
 ('Clint Eastwood', 'pop', 'Gorillaz'),
 ('Paparazzi', 'pop', 'Lady Gaga'),
 ('Umbrella', 'pop', 'Rihanna'),
 ('Hey Ya!', 'p

In [127]:
genres_songs = pd.DataFrame(genres_songs, columns=['Title', 'Genre', 'Artist'])



In [149]:
spotify_analysis_songs=create_track_complete_analysis(genres_songs)

  df.loc[i, "id"] = track_details['id']
