#### Libraries

In [10]:
import pandas as pd
import numpy as np

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

from IPython.display import Markdown, display

from youtubesearchpython import VideosSearch

from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn import cluster, datasets
from sklearn.cluster import KMeans

import random

import pickle

#### Identification and Authentication

In [2]:
secrets_file = open("secrets.txt","r")
string = secrets_file.read()
string.split('\n')

# Dictionary
secrets_dict={}
for line in string.split('\n'):
    if len(line) > 0: 
        secrets_dict[line.split(':')[0]]=line.split(':')[1].strip()

#### Initializing spotipy

In [3]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=secrets_dict['cid'],
                                                           client_secret=secrets_dict['csecret']))


#### Importing required files

In [4]:
topsongs = pd.read_csv('topsongs_data.csv', index_col =False)
spotify_data = pd.read_csv('spotify_data.csv', index_col = False )

#### Importing scaler and kmeans models

In [5]:
scaler = pickle.load(open('scaler_model.pkl','rb'))

In [6]:
kmeans = pickle.load(open('kmeans_model.pkl','rb'))

#### Importing song recommender function

Couldn't import my function

In [10]:
def get_song_from_spotify(song_searched, spotify_data):
    
    """
    The function searches for a song in the Spotify database by taking in a song name 
    and a dataframe spotify_data. If the song is not in spotify_data, it searches for
    the song in the Spotify API and extracts information such as URI and features
    of the selected song. This information is converted to a DataFrame and appended to
    spotify_data. The updated dataframe is returned, and if the song is not found,
    it returns None.
    """
    
    # If song is NOT in spotify_data:
    if song_searched not in spotify_data['title'].values:
        
        # Search song usiing spotipy
        track_id = sp.search(q='track:' + song_searched, type='track')  # song info

        # If there are multiple versions of the song played by different artists:
        if len(track_id['tracks']['items']) > 1:
            
            # Print list of artists who played the song
            print("\n")
            print("Multiple versions of the song '{}' were found:".format(song_searched))
            print("\n")
            for i, item in enumerate(track_id['tracks']['items']):
                print("{}: {}".format(i+1, item['album']['artists'][0]['name']))
                
            # Ask user to select the desired version of the song
            while True:
                try:
                    # Ask the user to enter a number to pick an artist
                    print("\n")
                    selection = int(input("Please enter the NUMBER of the version you are looking for: "))
                    # Checking that selected number is not outside the range of available options
                    if selection < 1 or selection > len(track_id['tracks']['items']):
                        # If it's outisde the range, print error message
                        print("Invalid selection. Please enter a NUMBER between 1 and {}.".format(len(track_id['tracks']['items'])))
                    # If number selected it's in range
                    else:
                        break
                # Except block which is executed if an exception is raised in the 'try' block.
                except ValueError:
                    print("Invalid selection. Please enter a NUMBER between 1 and {}.".format(len(track_id['tracks']['items'])))
            
            # Use the selected version of the song
            item = track_id['tracks']['items'][selection-1]
        
        # If there is NOT multiple versions of the song played by different artists:
        else:
            # Use the only version of the song found
            item = track_id['tracks']['items'][0]
        
        # Retrieving data of the song selected
        song_uri = item['uri']  # uri
        features = sp.audio_features(song_uri)[0]

        # New dictionary retrieving values for 'title', 'artist' and 'uri'
        track = {
            'title': item['name'],
            'artist': item['album']['artists'][0]['name'],
            'uri': song_uri,
        }

        # For each feature (song uri) add in 'track' dictionary new pairs of key-value to it
        for feature_key, feature_value in features.items():
            track[feature_key] = feature_value

        # Append new songs to spotify_data
        new_song_df = pd.DataFrame([track])
        spotify_data = pd.concat([spotify_data, new_song_df], axis=0)#, ignore_index=True)
        spotify_data = spotify_data.drop_duplicates()
        #spotify_data = spotify_data.drop(['Unnamed: 0'], axis=1)
        spotify_data = spotify_data.reset_index(drop=True)
        #spotify_data.to_csv('spotify_data.csv')
        
        # Return the results as a DataFrame
        return spotify_data
    
    else:
        
        print("{} was already in spotify_data database".format(song_searched))
        return spotify_data

    
        # track_name = track_id['tracks']['items'][0]['name'] # title
       # track_id['tracks']['items'][0]['album']['artists'][0]['name'] # artist band
       # song_uri = track_id['tracks']['items'][0]['uri'] # uri
       # features = sp.audio_features(song_uri)[0]
    

In [11]:
def recommend_song(song_searched, spotify_data, topsongs):
    """
    This function takes in a song name and a dataframe spotify_data.
    It first checks if the song is in topsongs, and if so, recommends a song from topsongs.
    If the song is not in spotify_data, the function uses Spotipy to search for the song 
    and gather information about it.
    If the song is in spotify_data or its information has already been collected,
    the function creates clusters to recommend a song from the same cluster
    as the selected song.
    """
    song_searched = song_searched.lower().strip()

    # Check if the song is in top songs:
    if song_searched in topsongs['title'].str.lower().str.strip().values:
        recommended_song = topsongs.loc[topsongs['title'].str.lower().str.strip() != song_searched].sample(n=1)
        display(Markdown(f"Based on your selection of '**{song_searched.capitalize()}**', we recommend the *hot* song '**{recommended_song['title'].iloc[0]}**' by **{recommended_song['artist'].iloc[0]}**."))
        return recommended_song, spotify_data
    
    # Nested get_song_from_spotify function:
    spotify_data = get_song_from_spotify(song_searched, spotify_data)

    X_features = spotify_data.select_dtypes(np.number)

    # Scaling Data
    scaler = StandardScaler()
    X_prep = scaler.fit_transform(X_features)

    # KMeans : 8 clusters
    kmeans = KMeans(n_clusters=8, random_state=1234)
    kmeans.fit(X_prep)

    # Finding the index of the selected song:
    spotify_data['title'] = spotify_data['title'].str.lower().str.strip()
    song_index = spotify_data.index[spotify_data['title'] == song_searched]
    
    if len(song_index) == 0:
        print(f"No matching song found for {song_searched}")
        return None, spotify_data
    
    song_index = song_index[0]

    # Predicting cluster for the selected song:
    song_cluster = kmeans.predict(X_prep[song_index].reshape(1, -1))[0]

    # Selecting a random song from the same cluster:
    cluster_songs = spotify_data[kmeans.labels_ == song_cluster]
    recommended_song = cluster_songs.sample(n=1)
    
    # Finding most similar song based on cosine similarity:
    X_searched = X_prep[song_index].reshape(1, -1)
    similarities = cosine_similarity(X_searched, X_prep)
    sim_indices = similarities.argsort()[0][::-1][1:]
    sim_cluster_songs = spotify_data.iloc[sim_indices][kmeans.labels_[sim_indices] == song_cluster]
    similar_song = sim_cluster_songs.iloc[0]

    # Get YouTube links for both recommended songs
    search_results = VideosSearch(f"{similar_song['title']} {similar_song['artist']}", limit=1).result()["result"]
    similar_song_url = search_results[0]["link"]
    search_results = VideosSearch(f"{recommended_song['title'].iloc[0]} {recommended_song['artist'].iloc[0]}", limit=1).result()["result"]
    recommended_song_url = search_results[0]["link"]
    
    print("\n")
    # Print both recommendations and their URLs
    display(Markdown(f"The most similar song to '**{song_searched.capitalize()}**' is:\n\n**'{similar_song['title'].upper()}'** by **'{similar_song['artist'].upper()}'**."))
    display(Markdown(f"URL: {similar_song_url}"))
    #display(Markdown(f"[**URL**]({similar_song_url})\n"))
    print("\n")
    display(Markdown(f"If you're a fan of '**{song_searched.capitalize()}**', you might like:\n\n**'{recommended_song['title'].iloc[0].upper()}'** by **'{recommended_song['artist'].iloc[0].upper()}'**.\n\nIt may not be the most similar, but who knows? You might discover your new favorite song!"))
    display(Markdown(f"URL: {recommended_song_url}"))
    #display(Markdown(f"[**URL**]({recommended_song_url})\n"))

## Song Recommender

In [16]:
song_searched = input()
#recommended_song, spotify_data = recommend_song(song_searched, spotify_data, topsongs)
recommend_song(song_searched, spotify_data, topsongs)
spotify_data.to_csv('spotify_data.csv', index=False)

donde habita el olvido


Multiple versions of the song 'donde habita el olvido' were found:


1: Joaquín Sabina
2: Joaquín Sabina
3: Bunbury
4: Joaquín Sabina
5: Joaquín Sabina
6: Joaquín Sabina
7: La Fuga
8: Jaime Candie
9: Malas Compañías
10: Candela Vargas


Please enter the NUMBER of the version you are looking for: 2




The most similar song to '**Donde habita el olvido**' is:

**'IT'S OVER - 7" REMIX'** by **'LEVEL 42'**.

URL: https://www.youtube.com/watch?v=P123rhOyGc8





If you're a fan of '**Donde habita el olvido**', you might like:

**'FEELIN' STRONGER EVERY DAY'** by **'CHICAGO'**.

It may not be the most similar, but who knows? You might discover your new favorite song!

URL: https://www.youtube.com/watch?v=DljvkWj_siY

In [27]:
from get_song_from_spotify_module import get_song_from_spotify

with open('get_song_from_spotify.pkl', 'rb') as f:
    get_song_from_spotify = pickle.load(f)

In [29]:
from recommend_song_module import recommend_song

with open('recommend_song.pkl', 'rb') as f:
    recommend_song = pickle.load(f)

FileNotFoundError: [Errno 2] No such file or directory: 'recommend_song.pkl'

In [26]:
import os

if os.path.exists('/IH-Labs/6-week/GNOD/recommend_song.pkl'):
    print("File exists")
else:
    print("File not found")

File not found
