# Case Study

## 1. User Exerperience

In [1]:
#pip install fuzzywuzzy

In [2]:
#pip install --user pyinputplus

In [3]:
import pandas as pd
import random
from fuzzywuzzy import process
import pyinputplus as pyip



In [4]:
hotsongs = pd.read_csv("hot songs.csv")
pd.set_option('display.max_rows', None)
hotsongs

Unnamed: 0,songs,artist
0,Flowers,Miley Cyrus
1,Kill Bill,SZA
2,Last Night,Morgan Wallen
3,Unholy,Sam Smith & Kim Petras
4,Creepin',"Metro Boomin, The Weeknd & 21 Savage"
5,Cuff It,Beyonce
6,Anti-Hero,Taylor Swift
7,Die For You,The Weeknd
8,I'm Good (Blue),David Guetta & Bebe Rexha
9,As It Was,Harry Styles


In [5]:
hotsongs2 = hotsongs.copy()
duplicated_entry = {"songs":"Flowers", "artist":"ABC"}
hotsongs2 = hotsongs2.append(duplicated_entry, ignore_index=True)
hotsongs2

  hotsongs2 = hotsongs2.append(duplicated_entry, ignore_index=True)


Unnamed: 0,songs,artist
0,Flowers,Miley Cyrus
1,Kill Bill,SZA
2,Last Night,Morgan Wallen
3,Unholy,Sam Smith & Kim Petras
4,Creepin',"Metro Boomin, The Weeknd & 21 Savage"
5,Cuff It,Beyonce
6,Anti-Hero,Taylor Swift
7,Die For You,The Weeknd
8,I'm Good (Blue),David Guetta & Bebe Rexha
9,As It Was,Harry Styles


In [6]:
def transform_database(reference_database):
    return reference_database.applymap(lambda x: x.lower().title())

In [7]:
def get_user_input():
    user_input = input("Please provide a song: ").lower().title()
    return user_input

In [8]:
def check_duplicates(song_name, reference_database, title_column, artist_column):
    if (reference_database[title_column] == song_name).sum() > 1:
        duplicates = list(reference_database[reference_database[title_column] == song_name][artist_column].values)
        additional_artist_input = pyip.inputMenu(duplicates, prompt = "There seem to be duplicated entries. Which of the following artists do you meant: \n")
        return additional_artist_input
    else:
        pass

In [9]:
def check_mistakes(song_name, reference_database, title_column, artist_column):
    best_match, score = process.extractOne(song_name, reference_database[title_column].values)
    if score >= 80:
        artists = reference_database[reference_database[title_column] == best_match][artist_column].values
        answer = pyip.inputYesNo(prompt = f"Did you mean {best_match} by {artists}? Y/N: ")
        if answer.lower().title() in ["Yes", "Y"]:
            return best_match
        else:
            pass
            
    else:
        pass

In [10]:
def recommend_hot_song(reference_database, title_column, artist_column):

    song_input = get_user_input()
    
    while song_input not in list(reference_database[title_column]):
            song_input = input("Sorry there seems no matching song available. Please provide another song: ").lower().title()
            song_input = check_mistakes(song_input, reference_database, title_column, artist_column)
    
    additional_artist_input = check_duplicates(song_input, reference_database, title_column, artist_column)
    
    random_song_index = random.randint(0, len(reference_database[title_column])-1)
    random_song = reference_database[title_column][random_song_index]
    random_artist = reference_database[artist_column][random_song_index]
    return random_song, random_artist

In [11]:
recommend_hot_song(hotsongs2, "songs", "artist")

Please provide a song: flowers
There seem to be duplicated entries. Which of the following artists do you meant: 
* Miley Cyrus
* ABC
abc


('X Si Volvemos', 'Karol G x Romeo Santos')

Functionality

- Sobald Lehrzeichen oder andere Zeichen/Typos eingefügt werden sollen diese entfern werden
- Sobald ein artist eingegeben wurde, fordere den User auf, einen Song statt einem artist einzugeben
- Sofern Duplikate angezeigt werden, sollten alle angezeigt werden
- Display youtube link to the song the user inserted
- If only part of the song is displayed, also find the song
- EXTRA: display the different options of the song
- Was passiert wenn "The" davor gesetzt wird

Done:
- Rechtschreibung (transformation der eingabe in Upper + Lowercase) (DONE)
- Falls song nicht gefunden wird sollte zunächst ein error ausgeworfen werden (DONE); später soll dann eine andere Funktion geschrieben werden

In Progress:

## 2. Spotify API

### Basic Spotify API Setup

In [9]:
import config

In [10]:
import spotipy
import json
from spotipy.oauth2 import SpotifyClientCredentials

In [11]:
#connect to spotify
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [None]:
#Send request to API
results = sp.search(q="Lose yourself",limit=1,market="US")
#sp.audio_features(results["tracks"]["items"][0]["id"] )

### Song Feature Extraction

In [None]:
#Extracting features of songs and appending them to dataframe
list_of_songs=[]
for index in range(0,len(song["tracks"]["items"])):
    list_of_songs.append(sp.audio_features(song["tracks"]["items"][index]["uri"])[0])
df=pd.DataFrame(list_of_songs)
df=df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

### Playlist 

In [None]:
#Extracting the songs ids from playlist
list_of_audio_features=[]
for item in range(0,10):
    #print (tracks[item]["track"]["id"])
    list_of_audio_features.append(sp.audio_features(tracks[item]["track"]["id"])[0])
df=pd.DataFrame(list_of_audio_features)    
df=df[["danceability","energy","loudness","speechiness","acousticness",
    "instrumentalness","liveness","valence","tempo","id","duration_ms"]]

In [None]:
#Searching a playlist
playlist = sp.user_playlist_tracks("spotify", "7beGd4yYY1qpsBv6K3clFZ",market="GB")

In [None]:
#Extracting songs of playlist
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id,market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

### Artists

In [None]:
Question: how to get the playlist ID?

In [None]:
#Extract artists from track
def get_artists_from_track(track):
    return [artist["name"] for artist in track["artists"]]

In [None]:
#Extract artists from playlist
def get_artists_from_playlist(playlist_id):
    tracks_from_playlist = get_playlist_tracks("spotify", playlist_id)
    return list(set(artist for subset in [get_artists_from_track(track["track"]) for track in tracks_from_playlist] for artist in subset))

### Albums

In [None]:
#Extract all tracks of one artist
def get_albums_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50,country="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [None]:
#Extract all albums ID of one artist
def get_album_ids_from_artist(artist_id):
    results = sp.artist_albums(artist_id, limit = 50)
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return [track["id"] for track in tracks]

In [None]:
#Check 
set([artist["name"] for track in coldplay_albums for artist in track["artists"]])

In [None]:
#Extracting songs of given album
def get_track_ids_from_albums(album_ids):
    return list(set([i["id"] for j in album_ids for i in sp.album(j)["tracks"]["items"]]))

### Pipeline

In [None]:
#setup
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id= config.client_id,
                                                           client_secret= config.client_secret))

In [None]:
#Identify spotify markets
markets = sp.available_markets()["markets"]

In [None]:
#Identify genres per markets & store them in dictionary
genres_per_country = {}

for market in markets:
    genres_per_country.update({market:{}})
    for category in sp.categories(country=str(market), limit=50)["categories"]["items"]:
        genres_per_country[market].update({category["name"]: category["id"]})

In [None]:
#Identify playlist_ids per market and per genre & store them in dictionary
from random import randint
from time import sleep

for market in markets:
    for category in genres_per_country[market]:
        try:
            genres_per_country[market].update({category: [genres_per_country[market][category], sp.category_playlists(category_id = genres_per_country[market][category], country = market, limit = 1)["playlists"]["items"][0]["id"]]})
        except:
            pass

In [None]:
#Format: Country: Category: Category_ID, Sample Playlist_ID
genres_per_country

In [None]:
sample = genres_per_country.copy()

In [None]:
def get_playlist_tracks(username, playlist_id):
    results = sp.user_playlist_tracks(username,playlist_id,market="GB")
    tracks = results['items']
    while results['next']:
        results = sp.next(results)
        tracks.extend(results['items'])
    return tracks

In [None]:
#Clean the playlist_ids
playlist__id_list = []
for market in markets:
    for category in sample[market]:
        playlist__id_list.append(sample[market][category][1])

In [None]:
#Check length of unclean playist_id list
len(playlist__id_list)

In [None]:
#Check length of unclean playist_id set -> There seems to be more than 7000 duplicates
len(set(playlist__id_list))

In [None]:
#Create final list of playlists
final_list = list(set(playlist__id_list))

In [None]:
#Get information of first 10 songs of each playlist
df_info = []

for playlist_id in final_list:
    for i in range(10):
        try:
            song_id = get_playlist_tracks("spotify", playlist_id)[i]["track"]["id"]
            df_info.append(sp.audio_features(song_id)[0])
        except:
            pass

In [None]:
#Append each gathered information to dataframe
df=pd.DataFrame(df_info) 
df=df[["danceability","energy","loudness","speechiness","acousticness","instrumentalness","liveness","valence","tempo","id","duration_ms"]]
df

In [None]:
df.to_csv("final.csv")

In [None]:
#Clean dataframe for multiple entries
df.duplicated().value_counts()

In [None]:
import base64
import requests

# Set up authorization headers
client_id = config.client_id
client_secret = config.client_secret
auth_header = base64.b64encode(f"{client_id}:{client_secret}".encode("ascii")).decode("ascii")
headers = {"Authorization": f"Basic {auth_header}"}

# Get access token
url = "https://accounts.spotify.com/api/token"
data = {"grant_type": "client_credentials"}
response = requests.post(url, headers=headers, data=data)
access_token = response.json()["access_token"]

# Make request to Top Tracks endpoint
artist_id = "0OdUWJ0sBjDrqHygGUXeCF"  # example artist ID
url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks"
headers = {"Authorization": f"Bearer {access_token}"}
params = {"market": "US"}

response = requests.get(url, headers=headers, params=params)