---
## 0. Imports

In [1]:
from bs4 import BeautifulSoup
import http.client
import urllib
import os

import pandas as pd
import numpy as np
import csv

import dateutil.parser
import datetime
import sys

import spotipy
import spotipy.util

In [2]:
filepath = ('./prev-chart-crawls'
            + '/pull_D-2019-11-19_T-19-56-59-692877'
            + '/Result_D-2019-11-19_T-19-56-59-693112_FrmD-2017-01-01_ToD-2019-11-18.csv')

chartCrawl_df = pd.read_csv(filepath, index_col=0)
chartCrawl_df['Date'] = pd.to_datetime(chartCrawl_df['Date'], yearfirst=True)

---
## 1. Spotify API

### 1.1. Connection

In [3]:
client_id = '987f8aab8f804962a2f19a86e310905c'
client_secret = 'bdb457608ae84339ad7d3c41696cf10e'
cred_manager = spotipy.util.oauth2.SpotifyClientCredentials(client_id, client_secret)

In [4]:
sp = spotipy.Spotify(client_credentials_manager=cred_manager)

### 1.2. Contents

In [5]:
def gen_type_str(obj, r_pat=' '):
    type_str = str(type(obj)).rpartition(r_pat)[2][:-1].replace('\'', '').strip()
    if isinstance(obj, str) and ('http' in obj):
        return '(url str)'
    else:
        return '('+type_str+')'

def print_dict_keys(print_arg, lvl=0, spc_mult=5, r_mrgn=50):
    if isinstance(print_arg, list) and lvl==0:
        print_arg = print_arg[0]
    for key, item in print_arg.items():
        indt_lvl = (spc_mult * lvl)
        key_spacing = ' '*indt_lvl
        type_spacing = ' '*(r_mrgn-len(key)-indt_lvl)
        print(key_spacing, key, type_spacing, gen_type_str(item))
        if isinstance(item, dict):
            print_dict_keys(item, lvl+1)
        else:
            continue

In [6]:
sf_track_id = '3JWiDGQX2eTlFvKj3Yssj3'
sf_album_id = sp.track(sf_track_id)['album']['id']
fmt_str = '{:_^70}'
sp_api_dict = {
    'TRACK DETAILS':[sp.track, sf_track_id],
    'AUDIO FEATURES':[sp.audio_features, sf_track_id],
    'AUDIO ANALYSIS':[sp.audio_analysis, sf_track_id],
    'ALBUM DETAILS':[sp.album, sf_album_id],
}
for sp_name, sp_list in sp_api_dict.items():
    print('\n', fmt_str.format(sp_name), '\n')
    print_dict_keys(sp_list[0](sp_list[1]))


 ____________________________TRACK DETAILS_____________________________ 

 album                                               (dict)
      album_type                                     (str)
      artists                                        (list)
      available_markets                              (list)
      external_urls                                  (dict)
           spotify                                   (url str)
      href                                           (url str)
      id                                             (str)
      images                                         (list)
      name                                           (str)
      release_date                                   (str)
      release_date_precision                         (str)
      total_tracks                                   (int)
      type                                           (str)
      uri                                            (str)
 artists                   

---
## 2. Helper Methods

In [7]:
def gen_internalTrackId_df(chartCrawl_df_arg):
    """Returns a DataFrame containing all unique internal track ids."""
    interalId_srs = chartCrawl_df_arg['Spotify_URL'].str.rpartition('/')[2].unique()
    return pd.DataFrame(interalId_srs, columns=['Internal Track ID'], dtype=str)

In [8]:
internalTrackId_df = gen_internalTrackId_df(chartCrawl_df)
internalTrackId_df

Unnamed: 0,Internal Track ID
0,4Km5HrUvYTaSUfiSGPJeQR
1,343YBumqHu19cGoGARUTsd
2,5aAx2yezTd8zXrkmtKl66Z
3,7BKLCZ1jbUBVqRi2FVlTVw
4,6fujklziTHa8uoM5OQSfIo
5,7yyRTcZmCiyzzJlNzGC9Ol
6,1xznGGDReH1oQq0xzbwXa3
7,7FB8l7UA1HKqnuSLjP9qDc
8,4pdPtRcBmOSQDlJ3Fk945m
9,0SGkqnVQo9KPytSri1H6cF


In [33]:
sp.track('04DwTuZ2VBdJCCC5TROn7L').keys()

dict_keys(['album', 'artists', 'available_markets', 'disc_number', 'duration_ms', 'explicit', 'external_ids', 'external_urls', 'href', 'id', 'is_local', 'name', 'popularity', 'preview_url', 'track_number', 'type', 'uri'])

---
## 3. API Methods

In [37]:
def get_details(unq_id_row):
    spotify_id = unq_id_row['Spotify_ID']
    details_dict = sp.track(spotify_id)
    return {'Spotify_ID':internalTrackId, 'Track Details Obj':trackDetails_dict}

In [38]:
def get_features(unq_id_row):
    spotify_id = unq_id_row['Spotify_ID']
    features_list = sp.audio_features(spotify_id)
    features_dict = features_list[0]
    return {'Spotify_ID':spotify_id, 'Audio Features Obj':features_dict}

In [39]:
def gen_trackInfo_df(unq_id_df,
                     get_track_details=False,
                     get_audio_features=False):
    """Returns and saves to csv the pd.DataFrames of track details."""
    trackDetails_df = unq_id_df.apply(get_trackDetails, 
                                        axis=1, 
                                        result_type='expand') if get_track_details else None
    
    audioFeatures_df = unq_id_df.apply(get_audioFeatures, 
                                         axis=1, 
                                         result_type='expand') if get_audio_features else None
    
    return (trackDetails_df, audioFeatures_df)

In [40]:
def get_newest_dirpath(data_dirpath):
    dir_contents = [d for d in os.listdir(data_dirpath) if d.startswith('D-')]
    dir_contents.sort(reverse = True)
    dir_name = dir_contents[0]
    return os.path.join(data_dirpath, dir_name)

In [41]:
def extract_details(details_df):
    tgt_details = [
        'id', 'external_ids', 'uri',  'name', 'album', 'artists', 
        'available_markets', 'duration_ms', 'explicit', 'track_number'
    ]
    for detail in tgt_details:
        details_df[detail] = details_df['Track Details Obj'].apply(lambda d: d[detail])
    return details_df

In [42]:
def extract_features(features_df):
    tgt_features = [
        'id', 'uri',
        'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 
        'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature'
    ]
    for feature in tgt_features:
        features_df[feature] = features_df['Audio Features Obj'].apply(lambda a: a[feature])
    return features_df

In [43]:
def pull_api_data(
    pull_dirpath=None,
    in_csv_file=False,
    get_details=False,
    get_features=False):
    
    pull_dirpath = get_newest_dirpath('./data') if (pull_dirpath is None) else pull_dirpath
    results_filepath = [d for d in os.listdir(pull_dirpath) if d.startswith('Result')][0]
    
    charts_df = pd.read_csv(os.path.join(pull_dirpath, results_filepath), index_col=0)
    unq_id_df = pd.DataFrame({'Spotify_ID':charts_df['Spotify_ID'].unique()})
    
    api_dfs_tup = gen_trackInfo_df(unq_id_df, get_details, get_features)
    
    return extract_features(api_dfs_tup[1])

In [44]:
pull_api_data(
    pull_dirpath=None,
    in_csv_file=False,
    get_details=True,
    get_features=True)

retrying ...1secs
retrying ...1secs


Unnamed: 0,Audio Features Obj,Spotify_ID,id,uri,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,"{'danceability': 0.927, 'energy': 0.665, 'key'...",4Km5HrUvYTaSUfiSGPJeQR,4Km5HrUvYTaSUfiSGPJeQR,spotify:track:4Km5HrUvYTaSUfiSGPJeQR,0.927,0.665,11,-5.313,1,0.2440,0.06100,0.000000,0.1230,0.1750,127.076,343150,4
1,"{'danceability': 0.928, 'energy': 0.481, 'key'...",343YBumqHu19cGoGARUTsd,343YBumqHu19cGoGARUTsd,spotify:track:343YBumqHu19cGoGARUTsd,0.928,0.481,9,-9.350,0,0.2870,0.10500,0.000000,0.1760,0.6130,134.007,210937,4
2,"{'danceability': 0.681, 'energy': 0.594, 'key'...",5aAx2yezTd8zXrkmtKl66Z,5aAx2yezTd8zXrkmtKl66Z,spotify:track:5aAx2yezTd8zXrkmtKl66Z,0.681,0.594,7,-7.028,1,0.2820,0.16500,0.000003,0.1340,0.5350,186.054,230453,4
3,"{'danceability': 0.748, 'energy': 0.524, 'key'...",7BKLCZ1jbUBVqRi2FVlTVw,7BKLCZ1jbUBVqRi2FVlTVw,spotify:track:7BKLCZ1jbUBVqRi2FVlTVw,0.748,0.524,8,-5.599,1,0.0338,0.41400,0.000000,0.1110,0.6610,95.010,244960,4
4,"{'danceability': 0.794, 'energy': 0.632, 'key'...",6fujklziTHa8uoM5OQSfIo,6fujklziTHa8uoM5OQSfIo,spotify:track:6fujklziTHa8uoM5OQSfIo,0.794,0.632,0,-6.163,1,0.0649,0.14200,0.000000,0.1280,0.3550,145.926,291893,4
5,"{'danceability': 0.886, 'energy': 0.525, 'key'...",7yyRTcZmCiyzzJlNzGC9Ol,7yyRTcZmCiyzzJlNzGC9Ol,spotify:track:7yyRTcZmCiyzzJlNzGC9Ol,0.886,0.525,8,-7.390,1,0.1310,0.23600,0.000000,0.0570,0.7080,145.990,225205,4
6,"{'danceability': 0.791, 'energy': 0.619, 'key'...",1xznGGDReH1oQq0xzbwXa3,1xznGGDReH1oQq0xzbwXa3,spotify:track:1xznGGDReH1oQq0xzbwXa3,0.791,0.619,1,-5.886,1,0.0532,0.00784,0.004230,0.3510,0.3710,103.989,173987,4
7,"{'danceability': 0.952, 'energy': 0.318, 'key'...",7FB8l7UA1HKqnuSLjP9qDc,7FB8l7UA1HKqnuSLjP9qDc,spotify:track:7FB8l7UA1HKqnuSLjP9qDc,0.952,0.318,10,-10.357,1,0.4670,0.17400,0.000000,0.2050,0.6650,120.077,209640,4
8,"{'danceability': 0.476, 'energy': 0.718, 'key'...",4pdPtRcBmOSQDlJ3Fk945m,4pdPtRcBmOSQDlJ3Fk945m,spotify:track:4pdPtRcBmOSQDlJ3Fk945m,0.476,0.718,8,-5.309,1,0.0576,0.07840,0.000010,0.1220,0.1420,199.864,205947,4
9,"{'danceability': 0.78, 'energy': 0.574, 'key':...",0SGkqnVQo9KPytSri1H6cF,0SGkqnVQo9KPytSri1H6cF,spotify:track:0SGkqnVQo9KPytSri1H6cF,0.780,0.574,1,-5.628,0,0.1410,0.10400,0.000000,0.1290,0.2730,81.502,222360,4


---
## ... Testing

In [45]:
trackInfo_tup = gen_trackInfo_df(chartCrawl_df[chartCrawl_df['Artist']=='Drake'],
                                 get_track_details=True,
                                 get_audio_features=True,
                                 get_audio_analysis=False)

TypeError: gen_trackInfo_df() got an unexpected keyword argument 'get_audio_analysis'

In [None]:
drake_details_df = trackInfo_tup[0]

In [None]:
tgt_features = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 
                'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']

In [None]:
drake_features_df = pd.DataFrame(trackInfo_tup[1], copy=True)
for feature in tgt_features:
    drake_features_df[feature] = drake_features_df['Audio Features Obj'].apply(lambda a: a[feature])
drake_features_df.set_index('Internal Track ID')
drake_features_df.to_csv('./drake_audio_features.csv')

In [None]:
talk_up_df = drake_details_df[(drake_details_df['Internal Track ID']=='3Yw09dj3cTXsAzlLjgBfIP') |
                 (drake_details_df['Internal Track ID']=='4ksuI04WMvUnJbHQjgs3L5')]
talk_up_df

In [None]:
gods_plan_df = drake_details_df[drake_details_df['Internal Track ID'].isin(
    ['2XW4DbS6NddZxRPm5rMCeY', '6T8cJz5lAqGer9GUHGyelE', '2VWbHHhWnMzKWPUs4IEEW9', '6DCZcSspjsKoFjzjrWoCdn'] 
)]
gods_plan_df

In [None]:
dont_talk_df = drake_details_df[drake_details_df['Internal Track ID'].isin(
    ['36ONiya0OANYknz0GgJmwB', '6G8kHiVZ1jW7vHMPVRNZU0'] 
)]
dont_talk_df

In [None]:
tgt_keys_arr = ['disc_number', 'duration_ms', 'explicit', 'external_ids', 
                'external_urls', 'href', 'id', 'name', 'preview_url', 'track_number', 'uri']

def print_track_compares(track_df, keys_arr=tgt_keys_arr):
    print('----------\n')
    for track_idx, details_obj in track_df['Track Details Obj'].items():
        print('track_idx:\t', track_idx, '\n')
        for tgt_key in tgt_keys_arr:
            print(tgt_key + ':\t\t', details_obj[tgt_key])
        print('\n----------\n')

In [None]:
print_track_compares(talk_up_df)

In [None]:
print_track_compares(gods_plan_df)

In [None]:
print_track_compares(dont_talk_df)

In [None]:
trackInfo_tup[1]

In [None]:
trackInfo_tup[2]

In [None]:
sp.audio_analysis('2QpGZOhTCHHiKmpSO9FW4h')['track'].keys()

In [None]:
sp.audio_features('2QpGZOhTCHHiKmpSO9FW4h')[0].keys()

In [None]:
sp.audio_analysis('https://open.spotify.com/track/2QpGZOhTCHHiKmpSO9FW4h')

In [None]:
sp.track('https://open.spotify.com/track/2QpGZOhTCHHiKmpSO9FW4h')

In [None]:
search_result = sp.search(q='Dance Monkey Tones and I', limit=1, type='track', market='US')
search_result['tracks']['items'][0].keys()

In [None]:
sp.search(q='Dance Monkey Tones and I', limit=3, type='track', market='US')

In [None]:
drake_url_test1 = sp.track('0w1ZtnzQmtmuuoKxHT0pLL')
drake_url_test1

In [None]:
drake_url_test2 = sp.track('4HG1YiGBseVKzjyKcmAJen')
drake_url_test2

In [None]:
keys_list = list(drake_url_test1.keys())
for key in keys_list:
    print(key)
    print(drake_url_test1[key], '\n')

In [None]:
pd.set_option('display.max_colwidth', -1)
pd.reset_option('display.max_rows')
drake_url_test1_list = [drake_url_test1[key] for key in drake_url_test1.keys()]
drake_url_test2_list = [drake_url_test2[key] for key in drake_url_test2.keys()]
test_df = pd.DataFrame({'0w1ZtnzQmtmuuoKxHT0pLL':drake_url_test1_list, 
                        '4HG1YiGBseVKzjyKcmAJen':drake_url_test2_list},
                       index=drake_url_test1.keys())
test_df['Same val?'] = test_df['0w1ZtnzQmtmuuoKxHT0pLL']==test_df['4HG1YiGBseVKzjyKcmAJen']

In [None]:
test_df

In [None]:
drake_url_test1['id'] == drake_url_test2['id']

In [None]:
drake_url_test1['href'] == drake_url_test2['href']

In [None]:
drake_url_test1['href']

In [None]:
drake_url_test1['external_urls'] == drake_url_test2['external_urls']

In [None]:
drake_url_test1['external_urls']

In [None]:
drake_url_test2['external_urls']

In [None]:
drake_url_test1['external_urls'] == drake_url_test2['external_urls']