In [2]:
!pip install pandas scikit-learn




In [3]:
# Import necessary libraries
import pandas as pd 

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets (update the paths as needed)
tracks_path = 'tracks.csv'  # Update the path to your tracks.csv dataset
artists_path = 'artists.csv'  # Update the path to your artists.csv dataset
dict_artists_path = 'dict_artists.csv'  # Update the path to your dict_artists.csv

# Load the data
tracks_data = pd.read_csv(tracks_path)
artists_data = pd.read_csv(artists_path)
dict_artists_data = pd.read_csv(dict_artists_path)

# Preprocessing: Select relevant columns (danceability, energy, popularity, acousticness, valence)
selected_columns = ['name', 'artists', 'danceability', 'energy', 'popularity', 'acousticness', 'valence']
tracks_data = tracks_data[selected_columns]

# Handle missing values (if any)
tracks_data.dropna(inplace=True)

# Normalize feature columns
scaler = StandardScaler()
tracks_data[['danceability', 'energy', 'popularity', 'acousticness', 'valence']] = scaler.fit_transform(
    tracks_data[['danceability', 'energy', 'popularity', 'acousticness', 'valence']]
)

# Function to get similar artists from dict_artists.csv
def get_similar_artists(artist_name):
    artist_name = artist_name.lower()
    # Find the artist ID from the artists.csv dataset
    artist_id = artists_data[artists_data['name'].str.lower() == artist_name]['id'].values
    if artist_id.size > 0:
        artist_id = artist_id[0]
        # Find similar artists from the dict_artists.csv file
        similar_artists = dict_artists_data[dict_artists_data['root'] == artist_id].iloc[0, 1:].dropna().values.tolist()
        similar_artist_names = []
        # Find artist names from the artist IDs
        for sim_artist_id in similar_artists:
            artist_name = artists_data[artists_data['id'] == sim_artist_id]['name'].values
            if artist_name.size > 0:
                similar_artist_names.append(artist_name[0])
        return similar_artist_names
    return []

# Function to find songs based on similar artists
def find_songs_by_similar_artists(artist_name, data, top_n=10):
    similar_artists = get_similar_artists(artist_name)
    if not similar_artists:
        print(f"No similar artists found for {artist_name}")
        return pd.DataFrame()
    
    # Get songs by similar artists
    similar_artists_songs = data[data['artists'].apply(lambda x: any(artist in x for artist in similar_artists))]
    return similar_artists_songs.head(top_n)

# Function to find the most similar songs based on a given song or artist with energy filtering
def find_similar_songs(song_or_artist, data, energy_level=None, top_n=10):
    # Convert user input to lowercase for matching
    song_or_artist = song_or_artist.lower()
    
    # Check if input is an artist or song
    is_artist = data['artists'].str.lower().str.contains(song_or_artist)
    is_song = data['name'].str.lower().str.contains(song_or_artist)
    
    if is_artist.any():
        # If an artist is found, use the first match to find similar songs
        reference_song = data[is_artist].iloc[0]
    elif is_song.any():
        # If a song is found, use the first match to find similar songs
        reference_song = data[is_song].iloc[0]
    else:
        print(f"No matching song or artist found for: {song_or_artist}")
        return pd.DataFrame()
    
    # Extract feature vector of the reference song
    reference_vector = reference_song[['danceability', 'energy', 'popularity', 'acousticness', 'valence']].values.reshape(1, -1)
    
    # Compute cosine similarity between the reference song and all songs
    similarity_scores = cosine_similarity(data[['danceability', 'energy', 'popularity', 'acousticness', 'valence']], reference_vector)
    
    # Add similarity scores to the dataframe
    data['similarity'] = similarity_scores
    
    # If energy level is specified, filter based on energy
    if energy_level:
        energy_scaled = scaler.transform([[0, energy_level, 0, 0, 0]])[0][1]
        data = data[(data['energy'] >= energy_scaled - 0.2) & (data['energy'] <= energy_scaled + 0.2)]
    
    # Sort songs by similarity score (descending)
    similar_songs = data.sort_values(by='similarity', ascending=False).head(top_n)
    
    # Exclude the reference song itself from recommendations
    similar_songs = similar_songs[similar_songs['name'] != reference_song['name']]
    
    return similar_songs[['artists', 'name', 'danceability', 'energy', 'popularity', 'acousticness', 'valence', 'similarity']]

# Main function to integrate everything
def main():
    # Example user inputs
    song_or_artist_input = "Ed Sheeran"  # You can replace this with any artist or song name
    energy_input = 0.8  # Example energy level input (scale from 0 to 1)

    # Find similar songs with energy level filtering
    similar_songs = find_similar_songs(song_or_artist_input, tracks_data, energy_level=energy_input)
    
    if not similar_songs.empty:
        print("Top similar songs:")
        print(similar_songs)
    else:
        print("No recommendations found.")
    
    # Also try finding songs based on similar artists
    print(f"Finding songs similar to artists related to {song_or_artist_input}:")
    similar_artist_songs = find_songs_by_similar_artists(song_or_artist_input, tracks_data)
    
    if not similar_artist_songs.empty:
        print("Top songs by similar artists:")
        print(similar_artist_songs)
    else:
        print("No similar artist recommendations found.")

# Run the main function
if __name__ == '__main__':
    main()


ModuleNotFoundError: No module named 'pandas'