In [None]:
import spotipy
from spotipy.oauth2 import SpotifyOAuth
import pandas as pd
import numpy as np

import logging

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

import mlflow
import mlflow.sklearn
from itertools import combinations

# Spotify API 인증 정보
CLIENT_ID = '9c98fec7710b40d4a5822cc356bf77c7'  # 당신의 Client ID
CLIENT_SECRET = '77f933bc3213455ea9b1672f94b34fe8'  # 당신의 Client Secret
REDIRECT_URI = 'http://localhost:3000/callback'  # 리다이렉트 URI

def get_auth() -> SpotifyOAuth :
    # SpotifyOAuth에 직접 전달
    return spotipy.Spotify(
        auth_manager=SpotifyOAuth(
        client_id=CLIENT_ID,
        client_secret=CLIENT_SECRET,
        redirect_uri=REDIRECT_URI,
        scope=["user-library-read",
            "playlist-read-private",
            "playlist-modify-public",
            "user-top-read"])
            )

def get_user_playlists(sp):
    playlists = sp.current_user_playlists()
    playlist_id_list = []

    for playlist in playlists['items']:
        playlist_id_list.append(playlist['id'])
        
    return playlist_id_list

sp = get_auth()


def get_recommendations_genre_similarity(select_artist, df, top): 
    # TfidfVectorizer를 사용하여 장르를 벡터화합니다.
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(df['genres'])

    artist_list = []
    select_artist = [artist.strip().lower() for artist in select_artist]
    
    for artist in select_artist:
        artist_df = df[df['artist'] == artist]
        
        if artist_df.empty:
            print(f"Artist {artist} not found in the DataFrame.")
            continue  # Skip if artist is not found
        
        artist_indices = artist_df.index.tolist()        
        artist_tfidf = tfidf_matrix[artist_indices].mean(axis=0)
        artist_tfidf = np.asarray(artist_tfidf).flatten()
        genre_sim = cosine_similarity([artist_tfidf], tfidf_matrix).flatten()
        similar_indices = np.argsort(-genre_sim)[1:top+1]
        temp = df.iloc[similar_indices]
        
        #선택한 아티스트 장르 == 요건 어떻게 할지 고민중
        # temp = temp[~temp['artist'].isin(select_artist)]
        
        # temp가 비어 있지 않을 때만 genre_similarity 추가
        if not temp.empty:
            temp['genre_similarity'] = genre_sim[similar_indices][:len(temp)]
            artist_list.append(temp)
        
    # 결과 데이터프레임으로 반환
    if artist_list:
        result_df = pd.concat(artist_list, ignore_index=True)
        return result_df
    else:
        return pd.DataFrame()


def get_recommendations_playlist_track_id(track_audio_features_list, df, count = 5):
   
    audio_features_columns = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'] 

    if len(track_audio_features_list) <= 0:
                return pd.DataFrame()

    rec = []
    for _, track_row in track_audio_features_list.iterrows():
        playlist_vector = track_row[audio_features_columns].values.reshape(1, -1)
        track_features = df[audio_features_columns].values

        # 코사인 유사도 계산
        similarity_scores = cosine_similarity(playlist_vector, track_features).flatten()

        # 자기 자신 제외한 상위 추천 트랙 인덱스
        sorted_indices = np.argsort(similarity_scores)[::-1]  # 내림차순 정렬
        similar_indices = sorted_indices[sorted_indices != _][:count]  # 자기 자신 제외, 상위 count 선택

        # 유효한 인덱스만 필터링
        valid_indices = [idx for idx in similar_indices if idx < len(df)]
        if not valid_indices:
            continue

        # 추천 트랙 추출
        similar_tracks = df.iloc[valid_indices].copy()
        similar_tracks['cosine_similarity_score'] = similarity_scores[valid_indices]
        rec.append(similar_tracks)

    return rec

def get_recommendations(df:pd.DataFrame, sp:SpotifyOAuth, select_artist = ['lady gaga', 'jimin']):
    user_playlists = get_user_playlists(sp)
    print(len(user_playlists))

    # 장르 기반
    if len(user_playlists) <= 0:
        result1 =  get_recommendations_genre_similarity(select_artist, df, 10) 
        display(result1)
    # 플레이리스트 음악적 특성 유사도
    else:
        result = []           
        for playlist_id in user_playlists:
            tracks = sp.playlist_tracks(playlist_id)

            track_id_list = []
            for track_item in tracks['items']:
                track = track_item['track']
                if df['track_id'].isin([track['id']]).any():  # track_id가 df에 존재하는지 확인
                    track_id_list.append(track['id'])
   
            track_audio_features_list = df[df['track_id'].isin(track_id_list)]
            result.append(get_recommendations_playlist_track_id(track_audio_features_list, df, 5))
        
        display(result)

load_file_path = 'C:\\Users\\user\\Documents\\dev\\upstageailab-ml-pjt-ml_p5\\mlflow\\df_ml5_genre_no_delete.csv'
df = pd.read_csv(load_file_path, sep=',')

df['artist'] = df['artist'].apply(lambda x: ''.join(e for e in x if e.isalnum() or e.isspace()))
df['artist'] = df['artist'].str.strip().str.lower()  # 소문자로 변환

df['genres'] = df['genres'].apply(lambda x: ''.join(e for e in x if e.isalnum() or e.isspace()))
df['genres'] = df['genres'].str.strip().str.lower()  # 소문자로 변환

get_recommendations(df, sp)

1


[[      Unnamed: 0                track_id                   track_name  \
  6511        6511  17tDv8WA8IhqE8qzuQn707  My First Kiss - feat. Ke$ha   
  5010        5010  381g0b6QZxC13SzA2HRMIc                     Power Up   
  43            43  3w3y8KPTfNeOKPiqUTakBh         Locked out of Heaven   
  2487        2487  6gbLpUQtUU8ojzbE9jKt5A                  On Somebody   
  3453        3453  5vTPxzm4h2bY9rYyVrGEU5            Really Don't Care   
  
        track_popularity track_album_release_date  danceability  energy  key  \
  6511                61               2010-06-25         0.682   0.889    0   
  5010                65               2018-08-06         0.732   0.934    8   
  43                  89               2012-12-07         0.726   0.698    5   
  2487                72               2019-12-30         0.698   0.742    5   
  3453                69               2013-01-01         0.706   0.728    7   
  
        loudness  mode  ...  valence    tempo  duration_ms  \
  