# Working with Spotify tracks
Getting information about Spotify tracks and running basic operations with tracks.

## Import statements

In [66]:
from my_spotify import get_spotify_object, get_all_tracks_from_playlist
import pandas as pd

## Get the Spotify object

In [38]:
spot = get_spotify_object('env/.env')

## Specify a playlist to work with

In [39]:
THE_BEATLES_PLAYLIST_URL_1 = 'https://open.spotify.com/playlist/6ZmqDRJKJf3v3LzYZAaGGU?si=6b487ba5f9b54374' # 215 songs

## Get the tracks from the playlist

In [40]:
tracks = get_all_tracks_from_playlist(THE_BEATLES_PLAYLIST_URL_1, 'env/.env')

## Check what some items from the "encompassing" dictionary corresponding to a track are
This "encompassing" dictionary includes an "embedded" dictionary pertaining to the track itself, as well as some other metadata.

In [41]:
# type(tracks)                          # list

t = tracks[0]
# type(track)                           # dict

display(t.keys())
display(t['added_at'])
display(t['is_local'])
display(t['primary_color'])
display(t['video_thumbnail'])

display(t['track'])

## Check what some items from the "embedded" dictionary corresponding to a track are

In [42]:
track = t['track']
# type(track)                           # dict

display(track.keys())

display(track['track'])
display(track['name'])
display(track['popularity'])
display(track['album'])
display(track['album']['name'])
display(track['duration_ms'])
display(track['artists'][0]['name'])
display(track['id'])
display(track['uri'])
display(track['href'])
display(track['episode'])               # False
display(track['track'])                 # True

## Get the audio features for a track
See [this](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-several-audio-features) for an explanation of the meanings of all these parameters.

In [43]:
spot.audio_features(['spotify:track:3KiexfmhxHvG5IgAElmTkd'])

## Create a list of URIs of all tracks from the playlist

In [44]:
uri_list = [t['track']['uri'] for t in tracks]
uri_list[:5]

## Create a list of *(uri, title, popularity, duration)* tuples of all tracks from the playlist

In [45]:
tracks_data = [(t['track']['uri'],
                t['track']['name'].split(' - Remastered')[0].split(' / Remastered')[0],
                t['track']['album']['name'].split(' (Remastered)')[0],
                t['track']['popularity'],
                int(round(t['track']['duration_ms'] / 1000, 0))) for t in tracks]
display(len(tracks_data))
display(tracks_data[:])

## Create a list of audio features dictionaries of all tracks from the playlist
See [this](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-several-audio-features) for an explanation of the meanings of all these parameters.

**Important:** `spot.audio_features()` will accept max 100 URIs!

In [46]:
offset = 0
runs, last_run = divmod(len(tracks), 100)                                                       # how many full runs, 100 tracks each
tracks_audio_features_dicts = []

if runs > 0:                                                                                    # all full runs, 100 tracks each
    for _ in range(runs):
        tracks_audio_features_dicts.extend(spot.audio_features(uri_list[offset:(offset+100)]))
        offset += 100
tracks_audio_features_dicts.extend(spot.audio_features(uri_list[offset:(offset+last_run)]))     # last run, generally < 100 tracks

# display(len(tracks_audio_features_dicts))
# display(tracks_audio_features_dicts)

## Create a list of *(key, mode, tempo, time_signature, valence, danceability, energy, loudness, acousticness, instrumentalness, liveness, speechiness)* tuples of all tracks from the playlist
See [this](https://developer.spotify.com/documentation/web-api/reference/#/operations/get-several-audio-features) for an explanation of the meanings of all these parameters.

Use `tracks_audio_features_dicts` list created above.

In [47]:
tracks_audio_features = [(t['key'],
                          t['mode'],
                          t['tempo'],
                          t['time_signature'],
                          t['valence'],
                          t['danceability'],
                          t['energy'],
                          t['loudness'],
                          t['acousticness'],
                          t['instrumentalness'],
                          t['liveness'],
                          t['speechiness']) for t in tracks_audio_features_dicts]

# display(len(tracks_audio_features))
# display(tracks_audio_features)

## Join the tuples from *tracks_data* and *tracks_audio_features* pairwise to create *tracks_data_and_audio_features*

In [48]:
tracks_data_and_audio_features = [(d + af) for d, af in zip(tracks_data, tracks_audio_features)]
# display(tracks_data_and_audio_features[:5])

## Bundle it all together in the *get_playlist_tracks_data()*, *get_playlist_tracks_audio_features()* and *get_playlist_tracks_df()* functions
Assumption: the *.env* file is already created as explained in *spotify_authentication.ipynb*, and its relative path is passed as an argument.

### *get_playlist_tracks_data()*

In [72]:
def get_playlist_tracks_data(playlist_id: str, env_file_path: str) -> list:
    tracks = get_all_tracks_from_playlist(playlist_id, env_file_path)
    tracks_data = [(t['track']['uri'],
                    t['track']['name'].split(' - Remastered')[0].split(' / Remastered')[0],
                    t['track']['album']['name'].split(' (Remastered)')[0],
                    t['track']['popularity'],
                    int(round(t['track']['duration_ms'] / 1000, 0))) for t in tracks]
    return tracks_data

In [57]:
# Test get_tracks(data)
display(get_playlist_tracks_data(THE_BEATLES_PLAYLIST_URL_1, 'env/.env'))

### *get_playlist_tracks_audio_features()*

In [73]:
def get_playlist_tracks_audio_features(playlist_id: str, env_file_path: str) -> list:
    spot = get_spotify_object(env_file_path)
    tracks = get_all_tracks_from_playlist(playlist_id, env_file_path)
    uri_list = [t['track']['uri'] for t in tracks]

    offset = 0
    runs, last_run = divmod(len(tracks), 100)                                                       # how many full runs, 100 tracks each
    tracks_audio_features_dicts = []

    if runs > 0:                                                                                    # all full runs, 100 tracks each
        for _ in range(runs):
            tracks_audio_features_dicts.extend(spot.audio_features(uri_list[offset:(offset+100)]))
            offset += 100
    tracks_audio_features_dicts.extend(spot.audio_features(uri_list[offset:(offset+last_run)]))     # last run, < 100 tracks

    tracks_audio_features = [(t['key'],
                              t['mode'],
                              t['tempo'],
                              t['time_signature'],
                              t['valence'],
                              t['danceability'],
                              t['energy'],
                              t['loudness'],
                              t['acousticness'],
                              t['instrumentalness'],
                              t['liveness'],
                              t['speechiness']) for t in tracks_audio_features_dicts]

    return tracks_audio_features

In [59]:
# Test get_tracks_audio_features()
display(get_playlist_tracks_audio_features(THE_BEATLES_PLAYLIST_URL_1, 'env/.env'))

### *get_playlist_tracks_df()*

In [70]:
def get_playlist_tracks_df(playlist_id: str, env_file_path: str) -> pd.DataFrame:
    COLUMNS = [
        'URI',
        'Title',
        'Album',
        'Popularity',
        'Duration',
        'Key',
        'Mode',
        'Tempo',
        'Time_signature',
        'Valence',
        'Danceability',
        'Energy',
        'Loudness',
        'Acousticness',
        'Instrumentalness',
        'Liveness',
        'Speechiness'
    ]

    tracks_data = get_playlist_tracks_data(playlist_id, env_file_path)
    tracks_audio_features = get_playlist_tracks_audio_features(playlist_id, env_file_path)
    tracks_data_and_audio_features = [(d + af) for d, af in zip(tracks_data, tracks_audio_features)]
    return pd.DataFrame(tracks_data_and_audio_features, columns=COLUMNS)


In [71]:
# Test get_playlist_tracks_df()
tracks_df = get_playlist_tracks_df(THE_BEATLES_PLAYLIST_URL_1, 'env/.env')
display(tracks_df)

Unnamed: 0,URI,Title,Album,Popularity,Duration,Key,Mode,Tempo,Time_signature,Valence,Danceability,Energy,Loudness,Acousticness,Instrumentalness,Liveness,Speechiness
0,spotify:track:3KiexfmhxHvG5IgAElmTkd,I Saw Her Standing There,Please Please Me,69,174,4,1,160.109,4,0.971,0.491,0.801,-9.835,0.270000,0.000000,0.0665,0.0361
1,spotify:track:40qXGg5nRbcWzcFb26KWkQ,Misery,Please Please Me,53,109,0,1,133.348,4,0.882,0.591,0.605,-10.925,0.707000,0.000000,0.3090,0.0413
2,spotify:track:2baEFuU0gQon0hgVRioI1o,Anna (Go To Him),Please Please Me,56,177,2,1,109.286,4,0.835,0.608,0.565,-11.060,0.635000,0.000000,0.0601,0.0336
3,spotify:track:3JQWLa88R35d971o5bIImd,Chains,Please Please Me,49,145,10,1,129.884,4,0.929,0.654,0.561,-10.551,0.608000,0.000000,0.1290,0.0304
4,spotify:track:5ZBeML7Lf3FMEVviTyvi8l,Twist And Shout,Please Please Me,77,155,2,1,124.631,4,0.937,0.482,0.849,-9.198,0.641000,0.000008,0.0414,0.0452
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210,spotify:track:48TNLcToLs8DWkdj5vYdiW,Rain,Past Masters (Vols. 1 & 2 / Remastered),50,181,0,1,106.933,4,0.541,0.234,0.693,-8.277,0.000028,0.899000,0.4420,0.0408
211,spotify:track:1tM9TBNPjieSOZ2d5VAQ1y,Lady Madonna,Past Masters (Vols. 1 & 2 / Remastered),37,138,2,1,109.110,4,0.780,0.631,0.748,-7.604,0.385000,0.308000,0.2050,0.0300
212,spotify:track:379hxtlY5LvbPQa5LL6dPo,The Inner Light,Past Masters (Vols. 1 & 2 / Remastered),39,156,8,1,101.545,3,0.387,0.304,0.543,-10.619,0.751000,0.014500,0.0764,0.0340
213,spotify:track:5xf0zdP4KCfshyVn02D3Ea,Kansas City / Hey-Hey-Hey-Hey - Medley,Beatles For Sale,45,158,7,1,131.360,4,0.924,0.588,0.724,-6.630,0.614000,0.000089,0.8770,0.0335


## Save the resulting dataframe

In [69]:
from pathlib import Path
data_dir = Path().cwd().parent / 'data'
data_dir.mkdir(parents=True, exist_ok=True)
# print(data_dir)
tracks_df.to_csv(data_dir / 'The Beatles songs dataset, Spotify 1.csv', index=False)