### Collect data from Spotify API using spotipy library

In [1]:
import datetime as dt
from dotenv import load_dotenv
import json
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import FuncFormatter
import numpy as np
import os
import pandas as pd
from pandas.io.json import json_normalize
from pathlib import Path
import re
import requests
import spotipy
import spotipy.util as util

%matplotlib inline

In [2]:
load_dotenv()
CLIENT_ID = os.getenv('CLIENT_ID')
CLIENT_SECRET = os.getenv('CLIENT_SECRET')
REDIRECT_URI = os.getenv('REDIRECT_URI')
USERNAME = os.getenv('USERNAME')
SCOPE = 'playlist-read-private'

In [4]:
spotify=spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(CLIENT_ID,CLIENT_SECRET,REDIRECT_URI,\
                                                          scope=SCOPE,username=USERNAME))
me = spotify.me()
me.keys()

dict_keys(['display_name', 'external_urls', 'followers', 'href', 'id', 'images', 'type', 'uri'])

In [5]:
def spotipy_userauth2(username):
    
    '''
    Creates authorization token and returns spotipy object. util prompt does not refresh token
    and maybe deprecated in future.
    '''
    
    token = util.prompt_for_user_token(username = username, scope=SCOPE,client_id=CLIENT_ID,\
                                       client_secret=CLIENT_SECRET, redirect_uri=REDIRECT_URI)
    
    sp = spotipy.Spotify(auth=token)
    
    return sp

def spotipy_userauth(username):
    
    '''
    Creates authorization token and returns spotipy object. Token automatically refreshes.
    '''
    username = username
    spotify=spotipy.Spotify(auth_manager=spotipy.SpotifyOAuth(CLIENT_ID,CLIENT_SECRET,REDIRECT_URI,\
                                                          scope=SCOPE,username=username))
    
    return spotify

In [4]:
def extract_playlists(spotipyUserAuth, username):
    
    '''
    Extract user's playlists' details
    
    spotipyUserAuth : spotipy object from 'spotipy_userauth' function.
    username : username(string)
    
    Returns a list of dictionary containing details of individual playlists.
    '''
    playlists = spotipyUserAuth.user_playlists(username) 
    playlistsdetails = playlists['items']
   
    return playlistsdetails 
    

In [5]:
def playlists_id_url(playlistsdetails):
    
    '''
    Collects and returns lists of playlist names, IDs, URLs, 
    and number of tracks present in a playlist
    
    playlistsdetails : list of dictionary containing details of individual
                       playlist. Obtained from spotipy.user_playlists.
                       
    Returns :  list of playlists' total tracks, url, and ids.
    '''
    
    pl_name = [] # Initiate playlist name list
    pl_id = []  # Initiate playlist id list
    pl_url = [] # Initiate playlist url list
    pltot_tracks = [] # Initiate playlist track count list
    
    playlistsURL = 'https://api.spotify.com/v1/playlists/'

    for i in range(len(playlistsdetails)) : 
    
        current_list = playlistsdetails[i]
        
        pl_name.append(current_list['name']) 
        pl_id.append(current_list['id'])
        url = playlistsURL + current_list['id'] + '/tracks'
        pl_url.append(url) 
        pltot_tracks.append(current_list['tracks']['total'])
    
    return pl_name, pl_id, pl_url, pltot_tracks

In [5]:
def playlists_id_url(playlistsdetails):
    
    '''*******************
    EDIT: Need to inlcude means to extract output for multiple public playlists via id/url or name
    playlistsdetails : list of dictionary containing details of individual
                       playlist. Obtained from spotipy.user_playlists.
                       
    Returns :  playlists' total tracks, url, and ids.
    '''
    
    pl_name = [] # Initiate playlist name list
    pl_id = []  # Initiate playlist id list
    pl_url = [] # Initiate playlist url list
    pltot_tracks = [] # Initiate playlist track count list
    
    playlistsURL = 'https://api.spotify.com/v1/playlists/'

    for i in range(len(playlistsdetails)) : 
    
        current_list = playlistsdetails[i]
        
        pl_name.append(current_list['name']) 
        pl_id.append(current_list['id'])
        url = playlistsURL + current_list['id'] + '/tracks'
        pl_url.append(url) 
        pltot_tracks.append(current_list['tracks']['total'])
    
    return pl_name, pl_id, pl_url, pltot_tracks

In [6]:
def get_pl_details(username):

    pl_details = extract_playlists(sp, username)

    pl_name, pl_id, pl_url, pltot_tracks = playlists_id_url(pl_details)

    return pl_name, pl_id, pl_url, pltot_tracks

In [6]:
def get_pl_details(username):
    '''
    ***********
    EDIT: accept username or public playlist list of id/url or name
    '''

    pl_details = extract_playlists(sp, username)

    pl_name, pl_id, pl_url, pltot_tracks = playlists_id_url(pl_details)

    return pl_name, pl_id, pl_url, pltot_tracks

In [7]:
def filtersort_playlists(pl_name, pl_id, pl_url, pltot_tracks, key_words = None, start = 0, pl_range = 10):
    
    '''
    Filters playlists based on provided keywords present in the name of the playlists or the 
    first n playlists present in the range provided.
    Sorts ascending by total number of tracks in a playlist.
    
    playlist name, id, url , total tracks : returned from the 'playlist_id_url' function.
    Key_words = list of words (genre/terms). Default None.
    pl_range = First 'n' number of playlists. Default 10
    
    Returns: filtered and sorted list of tuples - 
             (playlist name, id, url and # of tracks).
    '''
    
    fil_pl_name = []  # Initiate filtered playlist name list
    fil_pltot_tracks = []  # Initiate filtered total tracks list
    fil_pl_id = []  # Initiate filtered playlist ID list
    fil_pl_url = []  # Initiate filtered playlist URL list
    
    if key_words is not None:
    
        for i in range(len(pl_name)):
    
            name = pl_name[i]
            if any( word in name for word in key_words):
                fil_pl_name.append(name)
                fil_pltot_tracks.append(pltot_tracks[i])
                fil_pl_id.append(pl_id[i])
                fil_pl_url.append(pl_url[i])
    else:
        
        for i in range(start, pl_range):
            
            fil_pl_name.append(pl_name[i])
            fil_pltot_tracks.append(pltot_tracks[i])
            fil_pl_id.append(pl_id[i])
            fil_pl_url.append(pl_url[i])
        
        
    sorted_pl = sorted(zip(fil_pltot_tracks, fil_pl_name, fil_pl_id, fil_pl_url), reverse = True)
    
    return sorted_pl
    

In [8]:
def extract_tracks(spotipyUserAuth, playlist_id, allCol = False, showkeys = False):
    
    '''
    Extract track info of all tracks in a playlist. 
    
    spotipyUserAuth : spotipy object from 'spotipy_userauth' function.
    playlist_id : playlist id can be obtained from  'extract_playlists'
                  or 'filtersort_playlists' function.
    allCol : Default False - Returns a dataframe with only track name and id.
             True - Returns a complete dataframe of track details 
                    with all columns.
    
    showkeys : Prints all column names/keys of the complete dataframe 
    
    Returns: Dataframe with track info (Default - name and id)
    
    '''
    track_lim = 100
    
    tracks = spotipyUserAuth.playlist_tracks(playlist_id)
    tracks_json = [tracks['items'][j]['track'] for j in range(len(tracks['items']))]
    tracks_df = json_normalize(tracks_json, sep ='_')
    
    if tracks['total'] > track_lim :
        offset = track_lim
        tracks_dflist = [tracks_df]
        
        for i in range(int(tracks['total']/track_lim)):
            
            tracks = spotipyUserAuth.playlist_tracks(playlist_id, offset = (i+1)*offset)
            tracks_json = [tracks['items'][j]['track'] for j in range(len(tracks['items']))]
            tracks_df_ = json_normalize(tracks_json, sep ='_') 
            tracks_dflist.append(tracks_df_)
    
        tracks_df = pd.concat(tracks_dflist, ignore_index = True)

    if allCol == False:
        df = tracks_df[['name', 'id']]
    else:
        df = tracks_df
        
    if showkeys == True:
        print('Info keys are :', tracks_df.columns)
    
    return df
    

In [9]:
def track_genre(spotipyUserAuth, album_ids):
    
    '''
    spotipyUserAuth : spotipy object from 'spotipy_userauth' function.
    album_ids : list of album ids. If a single album id is provided, it needs to be wrapped
                in a list.
    Returns : List of tuples of Name and Genre of albums
    '''
   
    # To remove any repeating album ids
    album_ids = list(set(album_ids))
    album_ids.remove(None)
    tot_albums = len(album_ids)
    print('total albums', tot_albums)
    # Limit of number of albums spotipy takes in its method 'albums'
    album_lim = 20
    
    if  tot_albums < album_lim :
        # Switch assignment so the next loop runs just once
        album_lim = tot_albums
        
    album_genre = []
    end_idx = 0
    
    
    for i in range(int(tot_albums/album_lim)):

        start_idx = end_idx
        #print('in loop, start at: ',start_idx)
        end_idx = start_idx + album_lim
        #print('in loop, end at: ',end_idx)
        
        album_details = spotipyUserAuth.albums(album_ids[start_idx: end_idx])['albums']
        [album_genre.append((album_details[j]['name'], album_details[j]['genres'])) \
         for j in range(album_lim) if len(album_details[j]['genres']) != 0 ]

        
    if tot_albums%album_lim != 0:
        
        album_details = spotipyUserAuth.albums(album_ids[end_idx:])['albums']
        [album_genre.append((album_details[j]['name'], album_details[j]['genres'])) \
         for j in range(len(album_ids[end_idx:])) if len(album_details[j]['genres']) != 0 ]
            
    return album_genre

In [10]:
def extract_tracks_analysis(spotipyUserAuth, tracksid, showkeys = False):
    
    '''
    spotipyUserAuth : spotipy object from 'spotipy_userauth' function.
    trackids : list of track ids.
    showkeys : Default False - prints dictionary keys
    returns : list of dictionaries containing track analysis
    '''
    tracks_analysis = [spotipyUserAuth.audio_analysis(tracksid[j]) \
                      for j in range(len(tracksid))]
    
    if showkeys == True:
        print(tracks_analysis[0].keys())
    
    return tracks_analysis

In [11]:
def track_anlaysis_to_df(trackid = None, spotipyUserAuth = None,\
                      track_analysis = None):
    
    '''
    Convert track analysis dictionaries into dateframes- beats, bars, segments and sections.
    
    trackid : Spotify track id
    spotipyUserAuth : Spotipy auth object. Required if using track id
    track_analysis : Track analysis dictionary of a single track if trackid is not provided
    
    Returns : track overview (dictionary) and dataframes of beats, 
              bars, segments and sections
    '''
    
    if trackid is not None:
        if spotipyUserAuth is None:
            raise TypeError('Need spotipy authorized object')
            
        track_analysis = extract_tracks_analysis(spotipyUserAuth, [trackid])[0]
    
    
    trackoverview = track_analysis['track']
    # We don't need tatums currently
    
    beats_df = json_normalize(track_analysis['beats'], sep ='_')
    bars_df =  json_normalize(track_analysis['bars'], sep ='_')
    segments_df = json_normalize(track_analysis['segments'], sep ='_')
    sections_df = json_normalize(track_analysis['sections'], sep ='_')
    
    return trackoverview, beats_df, bars_df, segments_df, sections_df

In [12]:
def convert_time(secs):
    ''' COnverts seconds to mins. Format mm:ss 
    '''
    if pd.isna(secs):
        return float('NaN')
    else:
        int_secs = int(secs)
        if int_secs is not 0:
            milisecs = int(round(secs % int_secs,2) * 100)
        else:
            milisecs = int(round(secs,2) * 100)

        minutes = int(int_secs / 60)
        seconds = int_secs % 60

        return '{:0>2d}:{:0>2d}:{:0>2d}'.format(minutes,seconds,milisecs)

In [13]:
def tracks_analysis_(spotipyUserAuth, playlist_id):
    
    '''
    spotipyUserAuth : Spotipy auth object.
    playlist_id : playlist id  
    *user functions extract_tracks and extract_track_analysis used here.
    
    Returns : a list of tuples : (name of the track (string), tuple containing trackoverview (dictionary), 
                            beats_df, bars_df, segments_df, sections_df)
    '''
    # extract_tracks returns a dataframe
    tracks_df = extract_tracks(spotipyUserAuth, playlist_id)
    tracks_name = list(tracks_df['name'])
    tracks_id = list(tracks_df['id'])
    #track_analysis returns a list of dictionary
    tracks_analysis = extract_tracks_analysis(spotipyUserAuth, tracks_id)
    analysis_dict = {}
    
    for name_, track_analysis in zip(tracks_name, tracks_analysis):
        
        #trackanalysis = track_analysis_to_df(track_analysis = track_analysis)
        analysis_dict[name_] = track_analysis
        
    return analysis_dict

In [14]:
def get_segments(track_analysis, segments=True, min_conf=0.5, min_dur=0.25, tempo=True, \
                 sections=False, beats=False, bars=False):
    '''
    Get segments of tracks on a playlist with conditions on  minimum confidence 
    and minimum duration of a segment. Since we are currently interested in tempo
    of a track we will be returning that value as well.
    
    trackanalysis: track analysis (dict) of a track (obtained from tracks_analysis dict)
    segments: Default True. False if segments dataframe is not needed
    min_conf: minimum confidence to include a segment (range 0-1)
    min_dur : minimum duration/length in secs to include a segment
    tempo: Default True. False if tempo value is not needed
    sections: Default False. True if sections dataframe needs to be returned
    beats: Default False. True if beats dataframe needs to be returned
    bars: Default False. True if bars dataframe needs to be returned
    
    Returns: (in this order) tempo and segments dataframe (sections_df, beats_df, bars_df  as asked)
              of a single track
    '''
    
    trackoverview, beats_df, bars_df, segments_df, sections_df = \
        track_anlaysis_to_df(track_analysis = track_analysis)
    
    if tempo:
        tempo_df = pd.DataFrame({ 'tempo' : [trackoverview['tempo']]})
    
    # Introducing start_minute column for more readability of start time in min:sec format
    start_minute = segments_df['start'].map(convert_time)
    segments_df.insert(1,'start_minute', start_minute)
    segments_df_ = segments_df[(segments_df['confidence'] > min_conf) & (segments_df['duration'] > min_dur)]
    
    while len(segments_df_) < 100:
        min_conf = min_conf - 0.05
        min_dur = min_dur - 0.05
        segments_df_ = segments_df[(segments_df['confidence'] > min_conf) & (segments_df['duration'] > min_dur)]
    
    segments_df_ = segments_df_[['start', 'start_minute', 'duration', 'confidence', 'pitches', 'timbre']]
    
    # iterating over a boolean mask to collect what to output/return
    output = [b for a, b in zip(\
           [tempo, segments, sections, beats, bars],[tempo_df, segments_df_, sections_df, beats_df, bars_df])\
           if  a]
    
    return output

In [15]:
def get_playlist_analysis(spotipyUserAuth, playlist_id, segments=True, min_conf=0.5, \
                          min_dur=0.25, tempo=True, sections=False, beats=False, bars=False):
    
    '''
    spotipyUserAuth : Spotipy auth object.
    playlist_id : playlist id
    segments and tempo: Default True. False if not needed
    min_conf: minimum confidence to include a segment (range 0-1)
    min_dur : minimum duration/length in secs to include a segment
    sections/beats/bars: Default False. True if needs to be returned
    
    Returns : a dict with key/value pairs for all tracks in the playlist 
                Keys: name of track
                Value: list containing tempo and segment dataframe of the track 
                       (and sections/beats/bars if asked)
    '''
    
    tracks_df = extract_tracks(spotipyUserAuth, playlist_id)
    tracks_name = list(tracks_df['name'])
    tracks_id = list(tracks_df['id'])
    #track_analysis returns a list of dictionary
    tracks_analysis = extract_tracks_analysis(spotipyUserAuth, tracks_id)
    playlist_analysis = {}
    
    for name_, track_analysis in zip(tracks_name, tracks_analysis):
        
        # remove any special characters from name (they may cause issues in filenaming)
        name_ = re.sub(r'[*|><:"?/]|\\',"", name_)
        playlist_analysis[name_] = get_segments(track_analysis, segments=segments, \
                                min_conf=min_conf, min_dur=min_dur, tempo=tempo, \
                                sections=sections, beats=beats, bars=bars)
    return playlist_analysis

In [16]:
def get_folder_analysis(spotipyUserAuth, filsort_pl, segments=True, min_conf=0.5, \
                          min_dur=0.25, tempo=True, sections=False, beats=False, bars=False):
    
    '''
    Here, we will be using filtered and sorted output. Future edit should take user 
    playlist names and id.
    Returns: a dict with key/value pairs for all playlists in the folder.
             Key : Name of the playlist (string)
             Value : a dict of track analysis of all tracks from the playlist 
             (Values are returned from get_playlist_analysis)
    '''
    
    folder_analysis = {}
    
    for p in filsort_pl:
        
        # remove any special characters from name (they may cause issues in filenaming)
        pl_name = re.sub(r'[*|><:"?/]|\\',"", p[1])
        folder_analysis[pl_name] = get_playlist_analysis(spotipyUserAuth, playlist_id=p[2],\
                                                    segments=segments, tempo=tempo,\
                                                    min_conf=min_conf, min_dur=min_dur,\
                                                    sections=sections, beats=beats, bars=bars)
        
    return folder_analysis

In [17]:
sp = spotipy_userauth(USERNAME)

In [18]:
pl_Name_, pl_ID_, pl_URL_, pltot_Tracks_ = get_pl_details(USERNAME)

print(pl_Name_[:23])
print(pltot_Tracks_[:])

["Today's Top Hits", 'Chill Beats Weekly 🦔 groove, relax, study (instrumental lofi | chillhop | jazzhop)', 'Deeper House', 'Deep house', 'Progressive House', 'Mo House lo Trance', 'Our old school trance 138', 'Our old school trance 3', 'Our old school trance 2', 'Our old school trance', 'That familiar trance', 'Deep Trance', 'Classic progressive', 'Housy beats', 'TBD', 'Chill out', 'Progressive 8', 'Progressive 7', 'Progressive 6', 'Progressive 5.2', 'Progressive 5', 'Progressive 3', 'Progressive 2']
[50, 142, 11, 16, 15, 42, 14, 20, 8, 27, 61, 4, 6, 3, 70, 11, 18, 8, 28, 18, 19, 8, 6, 14, 83, 6, 154, 4, 124, 1, 2, 14, 50, 21, 18, 141, 128, 85, 31, 32, 1, 20, 2, 18, 4, 15, 4, 8, 15, 65]


In [19]:
key_words = ['Progressive']
filsort_pl = filtersort_playlists(pl_Name_, pl_ID_, pl_URL_, pltot_Tracks_, start = 1, pl_range = 23)
filsort_pl

[(142,
  'Chill Beats Weekly 🦔 groove, relax, study (instrumental lofi | chillhop | jazzhop)',
  '2rN3mSrzUcgjlj1TcEDTX7',
  'https://api.spotify.com/v1/playlists/2rN3mSrzUcgjlj1TcEDTX7/tracks'),
 (70,
  'TBD',
  '2YjqtjD98B6fGX55JBzybN',
  'https://api.spotify.com/v1/playlists/2YjqtjD98B6fGX55JBzybN/tracks'),
 (61,
  'That familiar trance',
  '3PH2J5HkKhhMoxWj3W0jk8',
  'https://api.spotify.com/v1/playlists/3PH2J5HkKhhMoxWj3W0jk8/tracks'),
 (42,
  'Mo House lo Trance',
  '2RnR5cw9kJUc9onu4WSRrW',
  'https://api.spotify.com/v1/playlists/2RnR5cw9kJUc9onu4WSRrW/tracks'),
 (28,
  'Progressive 6',
  '53kl8WegufR0IqMgyklXEL',
  'https://api.spotify.com/v1/playlists/53kl8WegufR0IqMgyklXEL/tracks'),
 (27,
  'Our old school trance',
  '7HUclibRF0h57pVvXr3g9v',
  'https://api.spotify.com/v1/playlists/7HUclibRF0h57pVvXr3g9v/tracks'),
 (20,
  'Our old school trance 3',
  '43sMbiw98RFTH2WjmbTidr',
  'https://api.spotify.com/v1/playlists/43sMbiw98RFTH2WjmbTidr/tracks'),
 (19,
  'Progressive 5',
  '

In [20]:
folder_analysis_dict = get_folder_analysis(sp, filsort_pl[2:-2])

In [21]:
folder_analysis_dict.keys()

dict_keys(['That familiar trance', 'Mo House lo Trance', 'Progressive 6', 'Our old school trance', 'Our old school trance 3', 'Progressive 5', 'Progressive 8', 'Progressive 5.2', 'Deep house', 'Progressive House', 'Our old school trance 138', 'Deeper House', 'Chill out', 'Progressive 7', 'Progressive 3', 'Our old school trance 2', 'Progressive 2', 'Classic progressive'])

In [22]:
folder_analysis_dict['That familiar trance'].keys()

dict_keys(['Erhu (ASOT 753) - Ost & Meyer Remix', 'Lucid Dreaming - Radio Edit', 'The Descent', 'Tuviana - Extended Mix', '24 Hours', 'Rebirth - Ahmed Romel Remix', 'Distorted Truth', 'Unbreakable Mirror', 'Kunai - Radio Edit', 'Farewell To The Moon - Alexander Popov Radio Edit', 'Hidden In A Smile - Dub Mix', 'Accelerate - Radio Edit', 'Athena', 'Sutra - Original Mix', 'Stratosphere', 'Mega', 'Isla', 'Frontier', 'Monarch', 'We Are True (Radio Edit)', 'Irufushi - Radio Edit', 'Color Field', 'Godless - Protoculture Remix', 'Solar - Extended Mix', 'Caesarea', 'Erhu - Ost & Meyer Remix', 'I Be - Radio Edit', 'Night Sky (Radio Edit)', 'Night Sky', 'Alien', 'Moonbow - Extended Mix', 'Shanghai - Radio Edit', 'New Memories - Original Mix', 'Sansa', 'Goldengate', 'Skyfire - Original Mix', 'No Hesitation', 'Alpha', 'Gone - Radio Edit', 'XO', 'Agera', 'Mind Over Time [ABGT179]', "You'll Know", 'Zero - Sergey Shemet Remix', 'London Sunrise - Radio Edit', 'Alpha Centauri', 'Endorphin', 'Cadence', 

In [23]:
folder_analysis_dict['That familiar trance']['Kunai - Radio Edit'][1]

Unnamed: 0,start,start_minute,duration,confidence,pitches,timbre
2,0.61247,00:00:61,0.32630,0.594,"[0.305, 1.0, 0.187, 0.007, 0.002, 0.003, 0.016...","[31.811, -143.666, -99.799, -65.984, -43.839, ..."
10,2.07751,00:02:08,0.26236,0.799,"[0.459, 1.0, 0.198, 0.031, 0.004, 0.005, 0.02,...","[42.934, -9.745, -42.17, -37.846, -28.843, 6.0..."
12,2.52585,00:02:53,0.28834,0.795,"[0.309, 1.0, 0.208, 0.057, 0.01, 0.024, 0.039,...","[45.571, 12.64, -40.96, -47.414, 1.864, -12.34..."
14,2.97551,00:02:98,0.30413,0.893,"[0.393, 1.0, 0.082, 0.075, 0.025, 0.022, 0.036...","[43.15, -41.084, -48.975, -85.389, -9.933, -22..."
29,5.81361,00:05:81,0.27841,1.000,"[0.399, 1.0, 0.205, 0.027, 0.003, 0.005, 0.019...","[39.258, -50.311, -42.747, -114.322, -19.697, ..."
...,...,...,...,...,...,...
863,221.83790,03:41:84,0.46889,0.521,"[0.078, 0.218, 1.0, 0.155, 0.105, 0.095, 0.126...","[49.683, 53.498, 2.285, -28.662, 46.563, -42.2..."
877,225.58770,03:45:59,0.25556,0.736,"[0.384, 0.77, 1.0, 0.76, 0.764, 0.586, 0.547, ...","[26.495, 217.56, 127.568, -9.847, -6.225, 17.8..."
879,226.02884,03:46:03,0.50485,0.738,"[0.502, 0.76, 1.0, 0.827, 0.874, 0.831, 0.665,...","[21.039, 242.154, 142.436, -22.68, -8.945, 22...."
882,226.95741,03:46:96,0.50522,0.764,"[0.677, 0.612, 0.957, 0.833, 0.946, 1.0, 0.713...","[14.612, 273.154, 159.746, -14.913, -22.189, 1..."


In [24]:
def create_dataset(folder_analysis):
    '''
    Creates folders for each playlist, subfolders for all tracks in a playlist folder
    and track analysis dataframes as parquet files
    '''
    
    # Path to 'Dataset' dir
    p = Path.cwd().parent.joinpath('Dataset')
    # list of dataframe names in output 
    df_names = ['tempo', 'segments', 'sections', 'beats', 'bars']
    
    for fn, i in folder_analysis.items():
        
        path_ = p.joinpath('{}'.format(fn))
        path_.mkdir(exist_ok=True) 
        
        for track, j in i.items():
            
            for k in range(len(j)):
            
                j[k].to_parquet(path_.joinpath('{}_{}.parquet'.format(track,df_names[k])), engine = 'pyarrow')
        

In [1]:
#create_dataset(folder_analysis_dict)

In [26]:
"""
dict_sampl_ =list(folder_analysis_dict.items())[-2:]
dict_sample ={ dict_sampl_[0][0] : dict_sampl_[0][1], dict_sampl_[1][0] : dict_sampl_[1][1] }
dict_sample['Classic progressive']['Skylarking - Original Mix'][0]
"""

"\ndict_sampl_ =list(folder_analysis_dict.items())[-2:]\ndict_sample ={ dict_sampl_[0][0] : dict_sampl_[0][1], dict_sampl_[1][0] : dict_sampl_[1][1] }\ndict_sample['Classic progressive']['Skylarking - Original Mix'][0]\n"

In [2]:
cell1 = 1

SyntaxError: invalid syntax (<ipython-input-1-8f5ec6d46e39>, line 1)