In [1]:
# %load project_utils.py
import sys
from os import environ
from secret import sql_password, spotify_credentials
import pandas as pd
import numpy as np
import psycopg2 as pg
from psycopg2 import Error
from spotipy.oauth2 import SpotifyClientCredentials
import librosa
import spotipy
import requests
from genre_replace import genre_replace
from sklearn.metrics.pairwise import cosine_similarity

client_credentials_manager = SpotifyClientCredentials(client_id=spotify_credentials['client_id'],
                                                      client_secret=spotify_credentials['client_secret'])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

#=============================== SQL Utils ====================================#
conn = pg.connect(database="spotify_db",
                  user="postgres", 
                  password=sql_password)

def run_query(q):
    '''a function that takes a SQL query as an argument
    and returns a pandas dataframe of that query'''
    with conn:
        try:
            cur = conn.cursor()
            cur.execute(q)
            return pd.read_sql(q, conn)

        except (Exception, pg.DatabaseError) as error:
            print(error)

#============================= Spotify Utils ==================================#
def search_and_extract(track_query):
    '''A function that takes in a song query and returns
    the track id and preview url for that track in a dict.'''

    track_query = str(track_query)

    #uses the API to search for a track
    search = sp.search(track_query, type='track', limit=1, market='US')

    track_id = search['tracks']['items'][0]['id']
    preview_url = search['tracks']['items'][0]['preview_url']
    track_name = search['tracks']['items'][0]['name']
    artist = search['tracks']['items'][0]['artists'][0]['name']
    artist_id = search['tracks']['items'][0]['artists'][0]['id']

    return track_id, preview_url, track_name, artist, artist_id

def get_artist_genre(artist_id):
    '''A function that takes in a Spotify artist id, calls the Spotify 
    API, and returns the artist genres, as a list'''
    search = sp.artist(artist_id)
    return search['genres']
    
def extract_features(track_id):
    '''A function that takes in a spotify track id, requests the audio
    features using the 'audio_features' endpoint from the Spotify API,
    and returns the features as a dataframe'''
    track_id = str(track_id)
    features = sp.audio_features(track_id)
	#     features[0].values()

    spotify_features = pd.DataFrame(data=features[0].values(),index=features[0].keys())
    spotify_features = spotify_features.transpose()
    spotify_features.drop(['type','uri','track_href','analysis_url'],axis=1,inplace=True)

    return spotify_features

#============================= Librosa Utils ==================================#
def check_for_track_preview(url):
    '''Given a url object, checks if the track has a
        preview'''
    if url == None:
        return False
    else:
        return True

def get_mp3(url,track_id):
    '''A function that takes an mp3 url, and writes it to the local
        directory "audio-files"'''
    try:
        doc = requests.get(url)
        with open(f'audio-files/track_{track_id}.mp3', 'wb') as f:
            f.write(doc.content)
    except:
        pass

def librosa_pipeline(track_id):
    '''This function takes in a spotify track_id as a string
        and uploads the cooresponding mp3 preview from a local
        directory. The mp3 then goes through the feature
        extraction process. A dictionary is returned with each
        audio feature as a key and their cooresponding value.

        REQUIREMENTS:
        * MP3 file must be in the directory in the form below
        '''

    track = f'audio-files/track_{track_id}.mp3'

    d = {}
    d['track_id'] = track_id

    #load mp3
    y, sr = librosa.load(track, mono=True, duration=30)

    #feature extraction
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    d['spectral_centroid'] = np.mean(spec_cent)

    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    d['spectral_bandwidth'] = np.mean(spec_bw)

    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    d['rolloff'] = np.mean(rolloff)

    zcr = librosa.feature.zero_crossing_rate(y)
    d['zero_crossing_rate'] = np.mean(zcr)

    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    for i,e in zip(range(1, 21),mfcc):
            d[f'mfcc{i}'] = np.mean(e)

    chroma = ['C', 'C#', 'D','D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B']
    chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
    for c,p in zip(chroma,chroma_stft):
        d[c] = np.mean(p)

    return d    

#============================= General Utils ==================================#
def check_query_format(query):
    query = query[:-1] if query.endswith(';') else query
    query = query.split(";")

    for track in query:
        track = track.split(",")
        try:
            name = track[0].strip()
            artist = track[1].strip()
        except IndexError:
            return False

def sort_inputs(query):
    not_in_db = []
    in_db = []
    query = query.replace("'","_")
    query = query[:-1] if query.endswith(';') else query
    query = query.split(";")
    
    for track in query:
        track = track.split(",")

        name = track[0].strip()
        artist = track[1].strip()

        q = f'''SELECT * FROM norm_tracks
        WHERE track_name ILIKE '%{name}%'
        AND artist ILIKE '%{artist}%';
        '''
        r = run_query(q)
        
        name = name.replace("_","'")
        
        if len(r) > 0:
            in_db.append(name + "," + artist)
        else:
            not_in_db.append(name + " " + artist)

    return [in_db, not_in_db]

def parse_and_sort_inputs(user_a_query, user_b_query):
    '''Takes in both user's input strings, and sets up a 
    dictionary to keep track of each user's inputs and 
    whether they are in the database or not. Calls the 
    sort_inputs function to parse and sort query strings.
    Returns the resulting dictionary'''
    # example user inputs
    # user_a = "malibu, miley cyrus; video games, lana del rey; you're no good, linda ronstadt"
    # user_b = "don't stop me now, queen; rocket man, elton john; toxic, britney spears"

    # combines the form input into a list for interation; dict to store tracks
    users = [user_a_query, user_b_query]
    initial_inputs = {'user_a':None,
                    'user_b':None}

    # for each set of tracks, I need to keep track which tracks are in/not in the DB
    for key,user in zip(initial_inputs.keys(),users):   
        in_db, not_in_db = sort_inputs(user)
        initial_inputs[key] = [in_db, not_in_db]


    return initial_inputs

def cos_sim(a,b):
    '''Calculates the cosine similarity between two feature
        vectors'''
    d = np.dot(a, b)
    l = (np.linalg.norm(a))*(np.linalg.norm(b))
    return d/l


# # for creating a spotify playlist from track_uris
# def create_playlist(sp, recommended_tracks):
#     user_all_data = sp.current_user()
#     user_id = user_all_data["id"]

#     playlist_all_data = sp.user_playlist_create(user_id, "Friendship Playlist")
#     playlist_id = playlist_all_data["id"]
#     playlist_uri = playlist_all_data["uri"]
#     # try:
#     sp.user_playlist_add_tracks(user_id, playlist_id, recommended_tracks)
#     # except spotipy.client.SpotifyException as s:
#     # 	print("could not add tracks")

#     return playlist_uri
#================================== IN DATABASE ===============================#
def in_database(in_db):
    '''takes in a list of tracks, parses it,
    queries the db for each track's feature 
    vector and genre, the appends each to
    a df, then returns the df'''

    in_db_df = pd.DataFrame()
    for t in in_db:
        track = t.split(",")
        name = track[0]
        artist = track[1]

        name = name.replace("'","_")

        q = f'''SELECT a.*, b.genre 
            FROM norm_tracks a JOIN tracks b
            ON a.track_id = b.track_id
            WHERE a.track_name ILIKE '%{name}%'
            AND a.artist ILIKE '%{artist}%'
            LIMIT 1;
            '''
        r = run_query(q)
        in_db_df = in_db_df.append(r)
    

    return in_db_df.reset_index(drop=True)
#================================ NOT IN DATABASE =============================#
def not_in_database(not_in_db):
    #search for a track and extract metadata from results
    metadata = {}
    for track in not_in_db:
        track_id, preview_url, track_name, artist, artist_id = search_and_extract(track) #using the input track name as the query to search spotify
        genres = get_artist_genre(artist_id)
        metadata[track_id] = [preview_url,track_name,artist,artist_id,genres]

    not_in_db_df = pd.DataFrame()
    # no_url = {}
    for track_id in metadata.keys():
        if metadata[track_id][0] == None:
            # no_url[track_id] = [metadata[track_id][1],metadata[track_id][2]]
            # continue
            pass
        
        spotify_features = extract_features(track_id)
        get_mp3(metadata[track_id][0],track_id)

        #use librosa to extract audio features
        r = librosa_pipeline(track_id)

        #turning dict into datframe
        librosa_features = pd.DataFrame(r,index=[0])

        #concatenating the two dfs so the feature vector will be in the same format as the db
        all_features = pd.concat([librosa_features,spotify_features],axis=1)
        all_features.drop(['id','duration_ms','time_signature','mode','key'],axis=1, inplace=True)

        #insert metadata into dataframe
        all_features.insert(1,'track_name',metadata[track_id][1])
        all_features.insert(2,'artist',metadata[track_id][2])
        all_features.insert(48,'genre',metadata[track_id][4][0])
        
        not_in_db_df = not_in_db_df.append(all_features)
    
    not_in_db_df = not_in_db_df.reset_index(drop=True)
    return not_in_db_df

def scale_features(not_in_db_df):
    # min-max scaling
    #querying for the database
    q = '''SELECT a.*, b.*
        FROM librosa_features a 
        JOIN spotify_features b ON a.track_id = b.id;'''

    database = run_query(q)
    database.drop(['id','duration_ms','time_signature','mode','key'],axis=1, inplace=True)
    i = len(database)
    fv = not_in_db_df.drop(['track_name','artist','genre'],axis=1)

    #append feature vector to bottom of the db
    database = pd.concat([database.iloc[:,1:],fv.iloc[:,1:]],ignore_index=True)

    # #apply a lambda function that does min-max normalization on the db
    database = database.apply(lambda x: (x - np.min(x)) / (np.max(x) - np.min(x)))
    
    #overwrite features vector df
    not_in_db_df.iloc[:,3:-1] = database.iloc[i:,:].values
    return not_in_db_df

def remap_genres(df):
    for i,row in df.iterrows():
        if isinstance(row['genre'],str):
            df.loc[i,'genre'] = genre_replace[row['genre']]

        elif isinstance(row['genre'],list):
            for g in row['genre']:
                if g in genre_replace.keys():
                    df.loc[i,'genre'] = genre_replace[g]
                else:
                    pass
                
    return df


#============================= Combining Steps ================================#
def generate_user_df(user_lists):
    '''MUST BE CALLED ON EACH USER KEY SEPARATELY
    Takes in the keys of the initial_inputs dictionary.
    This function calls the in_database and not_in_database
    functions, then concatenates them to create the final
    user dataframes needed to make recommendations. It
    also stores the songs that could not be analyzed in the
    no_url dictionary'''
    
    in_db_df = in_database(user_lists[0])
    not_in_db_df = not_in_database(user_lists[1])
    
    # if not_in_db_df.empty:
    #     user_df = in_db_df
    # else:
    #     not_in_db_df = scale_features(not_in_db_df)
    #     user_df = pd.concat([in_db_df,not_in_db_df],ignore_index=True)
        
    return in_db_df, not_in_db_df

def get_similar_track_ids(input_track_df):
    '''
    IMPORTANT:THIS FUNCTION IS MEANT FOR ITERATION
    ----------------------------------------------
    Takes in a pandas series of a single track
    that contains track_id, and genre. Then queries
    the db for all tracks in the same genre as the
    input track. The cosine similarity is then 
    calculated between the input track and all
    other tracks within the genre. The top two
    most similar track ids are returned in a list'''
    
    name = input_track_df['track_name'].replace("'","_")

    q2 = f'''
    SELECT a.*, b.genre 
    FROM tracks b
    JOIN norm_tracks a ON b.track_id = a.track_id
    WHERE b.genre = '{input_track_df['genre']}'
    AND a.track_id != '{input_track_df['track_id']}' 
    AND a.track_name NOT LIKE '%{name}%';'''
    genre_tracks = run_query(q2)
    
    
    all_scores = {}
    for i,row in genre_tracks.iterrows():
        track_id = row['track_id']
        score = cos_sim(input_track_df[3:-1],row[3:-1])
        all_scores[track_id] = score

    most_similar = sorted(all_scores, 
                          key=all_scores.get,
                          reverse=True)[:2]
    return most_similar

def get_feature_vector_array(id_list):
    '''
    IMPORTANT:THIS FUNCTION IS MEANT FOR ITERATION
    ----------------------------------------------
    Takes in a list of track_ids, queries the
    db for each track's feature vector, and returns
    a 2D array of the feature vectors and cooresponding
    track_ids as an index.
    '''
    id_list = set(id_list)
    q = f'''
    SELECT * FROM norm_tracks
    WHERE track_id IN {tuple(id_list)};'''
    fv = run_query(q)

    fv = fv.set_index('track_id')
    index = fv.index
    fv = fv.iloc[:,2:]
    array = fv.values
    
    return index, array
#============================== Final Steps ==================================#
def create_similarity_matrix(user_a_array, user_a_index, user_b_array, user_b_index):
    '''Takes in two 2D user arrays and their corresponding 
    track_id indices, calculates the cosine similarity
    between all tracks in each 2D array. Then sets up a
    pandas dataframe of the similarity scores
    '''
    cosine_matrix = cosine_similarity(user_a_array,user_b_array)

    cosine_df = pd.DataFrame(cosine_matrix,
                            columns=user_b_index,
                            index=user_a_index)

    return cosine_df

def get_combined_recommendations(cosine_df):
    '''Takes in the cosine similarity dataframe as an
    input, then finds the pairs of track that have 
    the top 3 similarity scores. Queries the db
    for the track metadata and uses the results as the
    final recommendations'''
    
    scores = {}
    for i,row in cosine_df.iterrows():
        scores[max(row)] = [i,row.idxmax()]
        
    top_three = sorted(scores,reverse=True)[:3]

    ids = [scores[i][0] for i in top_three] + [scores[i][1] for i in top_three]
    ids = set(ids)

    q = f'''
    SELECT track_id, track_name, artist, genre FROM tracks
    WHERE track_id IN {tuple(ids)};'''
    final = run_query(q)
    return final

In [3]:
q = '''SELECT * FROM track_metadata'''
data = run_query(q)

data

Unnamed: 0,track_id,track_name,artist,artist_id,track_album_album_type,track_album_id,track_album_name,track_duration_ms,track_popularity,track_preview_url,subgenres,genre,top_subgenre
0,6YDf6QV7QfCEo8O2dbWalK,Real Love Baby,Father John Misty,2kGBy2WHvF0VdZyqiVCkDT,single,6IpBMtAMrBvSHc9y6rTtQQ,Real Love Baby,189293.0,66.0,https://p.scdn.co/mp3-preview/a73451291399b823...,"['art pop', 'chamber pop', 'freak folk', 'indi...",indie,art pop
1,0qaWxVVd3KrP4WY27KWpIe,In a River,Rostam,04XggbrM51GcFPTxBYtRXT,single,4T3wWtIP6KqYa0KRZiuYiG,In a River,223573.0,58.0,https://p.scdn.co/mp3-preview/a47af0dc9a8a5582...,"['art pop', 'chamber pop', 'indie pop', 'indie...",indie,art pop
2,1yZMv2GMAibgLGsQiowZrt,Nancy From Now On,Father John Misty,2kGBy2WHvF0VdZyqiVCkDT,album,1BOfOlZo9Nzx7SmYAucY9t,Fear Fun,234160.0,59.0,https://p.scdn.co/mp3-preview/c209b9ccde129e96...,"['art pop', 'chamber pop', 'freak folk', 'indi...",indie,art pop
3,2eg2gvPXuwZ9FyrPaLgrXi,Chateau Lobby #4 (in C for Two Virgins),Father John Misty,2kGBy2WHvF0VdZyqiVCkDT,album,2A8IKX257C4hJaYUHMhLP7,"I Love You, Honeybear",170840.0,58.0,https://p.scdn.co/mp3-preview/1f9ea14fa26f55bb...,"['art pop', 'chamber pop', 'freak folk', 'indi...",indie,art pop
4,0wzruvvN7f5wu39aFcjTMw,Hollywood Forever Cemetery Sings,Father John Misty,2kGBy2WHvF0VdZyqiVCkDT,album,1BOfOlZo9Nzx7SmYAucY9t,Fear Fun,190613.0,56.0,https://p.scdn.co/mp3-preview/6aceffbff71010d0...,"['art pop', 'chamber pop', 'freak folk', 'indi...",indie,art pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33036,0a4agFmqHXxcZl1nho1BxM,Heart Of Glass - Special Mix,Blondie,4tpUmLEVLCGFr93o8hFFIB,compilation,7mEjsBlRmfP63cH1gdPT6A,Best Of Blondie,275733.0,69.0,,"['candy pop', 'dance rock', 'new romantic', 'n...",pop,candy pop
33037,1tsWNtWxi7SVydMm2G1W6Z,If I Can't Have You - 2007 Remastered Version ...,Yvonne Elliman,2d6JU9LvNhZR7AAtu4x2rS,album,0taUwU7qjtc9lvwmd7FKac,Saturday Night Fever [The Original Movie Sound...,180586.0,4.0,,['disco'],disco,disco
33038,3GGcwG519BTMdvMeFy7meT,Lady Marmalade,Patti LaBelle,0ty0xha1dbprYIUAQufkFn,compilation,2fgrmnOGJzwU79xlHe4Iub,Best Of Patti Labelle,235266.0,54.0,https://p.scdn.co/mp3-preview/da137cbab59ef54b...,"['disco', 'funk', 'motown', 'neo soul', 'new j...",disco,disco
33039,1ip1ULxxvAzaqH3VqKObdu,Can't Take My Eyes Off of You - Black Box Mix,Gloria Gaynor,6V6WCgi7waF55bJmylC4H5,album,3mn5uMVqqN1WPA2Jwd8lfk,Gloria Gaynor '90 (All New Versions),316000.0,52.0,https://p.scdn.co/mp3-preview/76fdc127a5fb13ec...,['disco'],disco,disco


In [40]:
data[data[['track_name','artist']].duplicated()==True].groupby

Unnamed: 0,track_id,track_name,artist,artist_id,track_album_album_type,track_album_id,track_album_name,track_duration_ms,track_popularity,track_preview_url,subgenres,genre,top_subgenre
11,5eum0L07wuKd7Agse5Lgy1,In a River,Rostam,04XggbrM51GcFPTxBYtRXT,single,1dmgv74ZmRNaeju9S2rjeA,In a River,223573.0,54.0,https://p.scdn.co/mp3-preview/a421c5ddbe690355...,"['art pop', 'chamber pop', 'indie pop', 'indie...",indie,art pop
852,6jmZlyf9DxcIoRrjw02YXm,Old Town Road - Remix,Lil Nas X,7jVv8c5Fj3E9VhNjxT4snq,single,0ycVaTFV4Me1jr7wM9Zqip,Old Town Road (Remix),171293.0,66.0,https://p.scdn.co/mp3-preview/672c1149fba6cdd8...,"['country rap', 'lgbtq+ hip hop', 'pop rap']",rap,country rap
1271,2n3czHasOXYNf6ufeMJ2RU,Mi Mujer,Nicolas Jaar,5a0etAzO5V26gvlbmHzT9W,compilation,55swB6EDSePmlPz8KHUftw,Lazy Summer by Chris Coco,392907.0,43.0,https://p.scdn.co/mp3-preview/347eef755172b0ff...,"['downtempo', 'electronica', 'microhouse']",indie,downtempo
1906,1LmN9SSHISbtp9LoaR5ZVJ,Payphone,Maroon 5,04gDigrS5kc9YWfZHwBETP,album,5x7JzoE4A3DwwU6FPt8qws,Overexposed,231386.0,72.0,,"['pop', 'pop rock']",pop,pop
2133,2jUO4Rbw8VizhpHS63jrq4,Late Night Feelings (feat. Lykke Li),Mark Ronson,3hv9jJF3adDNsBSIQDqcjp,album,6vdLdzihSWxV8FgWU2GSvA,Late Night Feelings,251028.0,60.0,https://p.scdn.co/mp3-preview/783ba7d00f0184c9...,"['dance pop', 'pop', 'post-teen pop']",pop,dance pop
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32638,1kWja6NVaLZThMop9MYVDz,Wrong Way,Sublime,0EdvGhlC1FkGItLOWQzG4J,album,5dwkpREUiLdmPLy4POzFSa,Sublime (Explicit Version),136400.0,2.0,,"['reggae fusion', 'ska mexicano', 'ska punk']",reggae,reggae fusion
32698,48O6kz322Dzu1R6Al5147q,MakeDamnSure,Taking Back Sunday,24XtlMhEMNdi822vi0MhY1,album,5MkvE4fgTdjQMEyVYZCdCk,Louder Now (U.S. Version),208280.0,5.0,,"['emo', 'pop punk']",rock,emo
32878,2ThA2HEEfcn47HPh210woX,Don't Stop 'Til You Get Enough - Single Version,Michael Jackson,3fMbdgg4jU18AjLCKBhRSm,album,4c4zsGZwtoTXOPGlDXTmw8,Off The Wall,364560.0,8.0,,"['pop', 'r&b', 'soul']",pop,pop
32897,0JW9adkIsVTKexxkj653yx,Billie Jean,Michael Jackson,3fMbdgg4jU18AjLCKBhRSm,compilation,2cDcNwNCq8Xq8SplfcxUIg,Michael Jackson: The Ultimate Collection,292960.0,6.0,,"['pop', 'r&b', 'soul']",pop,pop


In [38]:
data[data['artist']=='Lizzo'].sort_values(['track_name','track_preview_url'])

Unnamed: 0,track_id,track_name,artist,artist_id,track_album_album_type,track_album_id,track_album_name,track_duration_ms,track_popularity,track_preview_url,subgenres,genre,top_subgenre
1888,1du4tPkvJfCIuVL6WMtqSs,Boys,Lizzo,56oDRnqbIiwx4mymNEv7dS,album,7hBV0wo7cDHZQLYnuOJ312,Cuz I Love You (Super Deluxe),172630.0,64.0,https://p.scdn.co/mp3-preview/a2db1ef85045a886...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
26207,5MczYuYdsF4IE1b8ZEZqfy,Boys - Black Caviar Remix,Lizzo,56oDRnqbIiwx4mymNEv7dS,single,1OhlGc0ktAKbD1GqlfvpcK,Boys (Remixes),234098.0,39.0,https://p.scdn.co/mp3-preview/0a53bb5042bb23c5...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
1886,6YdQgWSpsxhVeX6Xmv3IFJ,Cuz I Love You,Lizzo,56oDRnqbIiwx4mymNEv7dS,album,6dFFcYQ8VhifgdKgYY5LYL,Cuz I Love You,179978.0,72.0,https://p.scdn.co/mp3-preview/04472d81bc5a9669...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
6040,1IOST4IzZK644vqdEFkPzg,Exactly How I Feel (feat. Gucci Mane),Lizzo,56oDRnqbIiwx4mymNEv7dS,album,74gSdSHe71q7urGWMMn3qB,Cuz I Love You (Deluxe),143190.0,53.0,https://p.scdn.co/mp3-preview/eed98a5739b6ab66...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
8059,6uAm7pG66O1XesXS9bpHSF,Good As Hell,Lizzo,56oDRnqbIiwx4mymNEv7dS,single,5PMpdvn38HScK6IuLniRep,Good As Hell,159011.0,23.0,,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
1884,6KgBpzTuTRPebChN0VTyzV,Good as Hell,Lizzo,56oDRnqbIiwx4mymNEv7dS,album,7hBV0wo7cDHZQLYnuOJ312,Cuz I Love You (Super Deluxe),159735.0,70.0,https://p.scdn.co/mp3-preview/26fb41451edf62e4...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
4907,3Yh9lZcWyKrK9GjbhuS0hR,Good as Hell,Lizzo,56oDRnqbIiwx4mymNEv7dS,single,4CA8GvVF7swZafg0zYh9qq,Good as Hell,159735.0,77.0,https://p.scdn.co/mp3-preview/26fb41451edf62e4...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
1881,2rjjNUYONZqC7wSd1i6OFq,Good as Hell (feat. Ariana Grande),Lizzo,56oDRnqbIiwx4mymNEv7dS,album,7hBV0wo7cDHZQLYnuOJ312,Cuz I Love You (Super Deluxe),159011.0,67.0,https://p.scdn.co/mp3-preview/95a1c1a7a990490f...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
1078,07Oz5StQ7GRoygNLaXs2pd,Good as Hell (feat. Ariana Grande) - Remix,Lizzo,56oDRnqbIiwx4mymNEv7dS,single,1k1HuvFs562Z3CCiSYhtc1,Good as Hell (feat. Ariana Grande) [Remix],159011.0,86.0,https://p.scdn.co/mp3-preview/95a1c1a7a990490f...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room
6035,4l21VaodybkZaNRqwzSBZl,Jerome,Lizzo,56oDRnqbIiwx4mymNEv7dS,album,74gSdSHe71q7urGWMMn3qB,Cuz I Love You (Deluxe),231570.0,56.0,https://p.scdn.co/mp3-preview/8f662de51ad3f613...,"['escape room', 'minnesota hip hop', 'pop', 'p...",hip hop,escape room


In [37]:
data[data['track_name'].str.contains("Remix") == True]
# test = data.groupby(["track_name"]).size().sort_values(ascending=False)
# test

Unnamed: 0,track_id,track_name,artist,artist_id,track_album_album_type,track_album_id,track_album_name,track_duration_ms,track_popularity,track_preview_url,subgenres,genre,top_subgenre
13,4RXyvXsnYwPekkN5Ptsd15,No Pain - Rostam Remix,DJDS,0orEyXhG0KnPEDVgpctNJI,single,56c2Q07YTudOhUKTcl340a,Big Wave More Fire (Remixes),173613.0,45.0,,['la pop'],pop,la pop
145,4XbZuXFhpVFkAOpmxhxE7K,Run Me Through (King Princess Remix),Perfume Genius,2ueoLVCXQ948OfhVvAy3Nn,single,1JxadQqPZmNNWDdw10Gahe,Run Me Through (King Princess Remix),222127.0,52.0,https://p.scdn.co/mp3-preview/920f299a833fb93e...,"['art pop', 'chamber pop', 'chillwave', 'dance...",indie,art pop
149,4h8TzshCRWhcTQd582ghSo,Just Like Love (Jam City Remix),Perfume Genius,2ueoLVCXQ948OfhVvAy3Nn,single,38ckwzi09abDlt8RP2McK8,Reshaped,208300.0,48.0,https://p.scdn.co/mp3-preview/cce7a430f0a4ddc3...,"['art pop', 'chamber pop', 'chillwave', 'dance...",indie,art pop
198,5iR8v6fTc33LDo2t4w3342,Futile Devices (Doveman Remix),Sufjan Stevens,4MXUO7sVCaFgFjoTI5ox5c,compilation,531pUFZ1BbEhfXDxitF8TU,Call Me by Your Name (Original Motion Picture ...,135443.0,54.0,https://p.scdn.co/mp3-preview/e5360619c7542e32...,"['baroque pop', 'chamber pop', 'freak folk', '...",indie,baroque pop
342,3YDZEi6jetqyeNZdsk8Y5R,Big City Boys - Generationals Remix,BRONCHO,6Lll1MPPak4m4vZKuJB264,single,4n548r94uoKnz0kJbq5lFo,Big City Boys (Generationals Remix),194705.0,41.0,https://p.scdn.co/mp3-preview/586422a7a495a57a...,"['indie garage rock', 'neo-psychedelic', 'norm...",indie,indie garage rock
...,...,...,...,...,...,...,...,...,...,...,...,...,...
31610,34Oki6j9VX3WmV9WllUJmO,Love Me Like You - J-Vibe Reggae Remix,Little Mix,3e7awlrlDSwF3iM0WBjGMp,single,5duX2rkm2mTEeDZaMmVhi9,Love Me Like You (The Collection),184520.0,38.0,https://p.scdn.co/mp3-preview/c08d53dd1782dce0...,"['dance pop', 'girl group', 'pop', 'post-teen ...",pop,dance pop
31614,3IbpQimWfBXUuSlrCSadc7,Genius - Banx & Ranx Reggae Remix,Sia,5WUlDfRSoLAfcVSX1WnrxN,single,1Xo1BbCbbXo9RkrjFFgGyH,Genius (Banx & Ranx Remixes),194880.0,45.0,https://p.scdn.co/mp3-preview/2d265831fed7bc45...,"['australian dance', 'australian pop', 'dance ...",pop,australian dance
31616,1n0Tvb6HY6EGjp2WHFHtAW,No One - Curtis Lynch Reggae Remix,Alicia Keys,3DiDSECUqqY1AuBP8qtaIa,single,0sZmgYFY0qhrQQeOcmXO6b,No One - EP,236973.0,44.0,https://p.scdn.co/mp3-preview/66fee27d3e13a2dc...,"['hip pop', 'neo soul', 'pop', 'r&b', 'urban c...",pop,hip pop
31617,4XsVJxpSUTIIoDckhW6r5L,With Ur Love (feat. Mike Posner) - Teka & Soul...,Cher Lloyd,4m4SfDVbF5wxrwEjDKgi4k,single,5bgU9eChoaXgc5DMhRSyug,With Ur Love (feat. Mike Posner),228066.0,38.0,https://p.scdn.co/mp3-preview/c7d2095d74de3362...,"['dance pop', 'electropop', 'pop', 'post-teen ...",pop,dance pop
