In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher
from sklearn.feature_extraction.text import CountVectorizer

# Read the CSV file and specify the data types for each column
dtype_dict = {
    'index': int,
    'id': str,
    'name': str,
    'artists': str,
    'danceability': float,
    'duration_ms': int,
    'energy': float,
    'instrumentalness': float,
    'liveness': float,
    'popularity': int,
    'speechiness': float
}
df = pd.read_csv("data.csv", dtype=dtype_dict)

# Clean up the 'artists' column
df["artists"] = df["artists"].str.replace("[", "")
df["artists"] = df["artists"].str.replace("]", "")
df["artists"] = df["artists"].str.replace("'", "")


def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()


class MusicRecommender:
    def __init__(self, rec_data):
        self.rec_data_ = rec_data
        self.svd_model = TruncatedSVD(n_components=6)  # Adjust the number of components here

    def preprocess_data(self):
        features = self.rec_data_.drop(columns=["index", "id", "name", "artists", "duration_ms"])
        self.svd_matrix = self.svd_model.fit_transform(features)

    def get_recommendations(self, column_name, value, amount=1):
        if column_name in ["name", "artists"]:
            query_indices = self.calculate_similarity(column_name, value)
            query_data = self.rec_data_.loc[query_indices]
            return query_data[:amount]

    def calculate_similarity(self, column_name, value):
        similarity_scores = []
        for idx, item in self.rec_data_.iterrows():
            similarity_scores.append(similar(value, item[column_name]))
        return np.argsort(similarity_scores)[::-1]


# Create the MusicRecommender object
recommender = MusicRecommender(df)

# Preprocess the data for SVD
recommender.preprocess_data()

# Ask for user input
print("Hi there! Please tell me your input from below:")
print("1. Artists 2. Danceability 3. Duration 4. Song name 5. Instrumentalness 6. Popularity")

try:
    search_basis = int(input("Enter the number corresponding to your search basis: "))

    if search_basis == 4:
        song_name = input("Enter the name of the song: ")
        filtered_data = df[df['name'].apply(lambda x: similar(song_name.lower(), x.lower())) > 0.7]
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print("\nFiltered Data:")
        if not filtered_data.empty:
            print("Top five matched data:")
            print(filtered_data[['index', 'name', 'duration_min']].head(5))
            print("\nTop five matched artists:")
            print(filtered_data['artists'].head(5))
        else:
            print("No songs found based on the input.")

    elif search_basis == 3:
        duration_min = float(input("Enter the duration of the song in minutes: "))
        duration_ms = int(duration_min * 60 * 1000)
        min_duration_ms = duration_ms - 0.5 * 60 * 1000
        max_duration_ms = duration_ms + 0.5 * 60 * 1000
        filtered_data = df[(df['duration_ms'] >= min_duration_ms) & (df['duration_ms'] <= max_duration_ms)]
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print("\nFiltered Data:")
        if not filtered_data.empty:
            print("Top five matched data:")
            print(filtered_data[['index', 'name', 'duration_min']].head(5))
            print("\nTop five matched artists:")
            print(filtered_data['artists'].head(5))
        else:
            print("No songs found based on the input.")

    elif search_basis == 2:
        danceability = float(input("Enter the danceability score (0-1): "))
        filtered_data = df[(df['danceability'] >= danceability - 0.1) & (df['danceability'] <= danceability + 0.1)]
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print("\nFiltered Data:")
        if not filtered_data.empty:
            print("Top five matched data:")
            print(filtered_data[['index', 'name', 'duration_min']].head(5))
            print("\nTop five matched artists:")
            print(filtered_data['artists'].head(5))
        else:
            print("No songs found based on the input.")

    elif search_basis == 1:
        artist_name = input("Enter the artist name: ")
        filtered_data = recommender.get_recommendations('artists', artist_name, amount=5)
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print("\nFiltered Data:")
        if not filtered_data.empty:
            print(f"Songs of artist '{artist_name}':")
            for _, row in filtered_data.iterrows():
                duration_min = row['duration_ms'] / 1000 / 60
                duration_formatted = "{:.2f}".format(duration_min)
                print(f"{row['index']} : {row['name']} : {duration_formatted} min")
            print("\nTop five matched artists:")
            print(filtered_data['artists'].head(5))
        else:
            print("No songs found based on the input.")

    elif search_basis == 5:
        instrumentalness = float(input("Enter the instrumentalness score (0-1): "))
        filtered_data = df[(df['instrumentalness'] >= instrumentalness - 0.1) & (df['instrumentalness'] <= instrumentalness + 0.1)]
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print("\nFiltered Data:")
        if not filtered_data.empty:
            print("Top five matched data:")
            print(filtered_data[['index', 'name', 'duration_min']].head(5))
            print("\nTop five matched artists:")
            print(filtered_data['artists'].head(5))
        else:
            print("No songs found based on the input.")

    elif search_basis == 6:
        print("\nFiltered Data:")
        print("Top five most popular songs:")
        filtered_data = df.nlargest(5, 'popularity')
        filtered_data = filtered_data.drop_duplicates(subset=['name'])
        filtered_data['duration_min'] = filtered_data['duration_ms'] / 1000 / 60
        filtered_data['duration_min'] = filtered_data['duration_min'].round(2)
        print(filtered_data[['index', 'name', 'duration_min']])
        print("\nTop five most popular artists:")
        top_artists = filtered_data.groupby('artists').size().reset_index(name='count').sort_values(by='count', ascending=False)
        print(top_artists.head(5))

    else:
        print("Invalid input. Please enter a number between 1 and 6.")

except ValueError:
    print("Invalid input. Please enter a number between 1 and 6.")
