In [1]:
import spotipy
import requests
from IPython.display import HTML
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
pd.set_option('display.max_colwidth', None)
import numpy as np

# Setting up API

In [2]:
hide_me = ''
HTML('''<script>
code_show=true; 
function code_toggle() {
  if (code_show) {
    $('div.input').each(function(id) {
      el = $(this).find('.cm-variable:first');
      if (id == 0 || el.text() == 'hide_me') {
        $(this).hide();
      }
    });
    $('div.output_prompt').css('opacity', 0);
  } else {
    $('div.input').each(function(id) {
      $(this).show();
    });
    $('div.output_prompt').css('opacity', 1);
  }
  code_show = !code_show
} 
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input style="opacity:0" type="submit" value="Click here to toggle on/off the raw code."></form>''')





CLIENT_ID = ''
CLIENT_SECRET =''

In [3]:
AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

# convert the response to JSON
auth_response_data = auth_response.json()

# save the access token
access_token = auth_response_data['access_token']

In [4]:
headers = {
    'Authorization': 'Bearer {token}'.format(token=access_token)
}

# Beatles URL ID

In [5]:
artistid = '3WrFJ7ztbogyGnTHbHJFl2'

In [6]:
BASE_URL = 'https://api.spotify.com/v1/'

# Getting Tracks associate with album

In [7]:
r = requests.get(BASE_URL + 'artists/' + artistid + '/albums', 
                 headers=headers, 
                 params={'include_groups': 'album', 'limit': 50,'country':'US'})
d = r.json()

In [8]:
for album in d['items']:
    print(album['name'], ',' ,album['release_date'])

Get Back (Rooftop Performance) , 2022-01-28
Let It Be (Super Deluxe) , 2021-10-15
Abbey Road (Super Deluxe Edition) , 2019-09-27
The Beatles , 2018-11-09
Live At The Hollywood Bowl , 2016-09-09
Let It Be... Naked (Remastered) , 2014-01-01
Yellow Submarine Songtrack , 2014-01-01
On Air - Live At The BBC (Vol.2) , 2013-11-11
1 (Remastered) , 2000-11-13
Live At The BBC (Remastered) , 1994-11-30
Let It Be (Remastered) , 1970-05-08
Abbey Road (Remastered) , 1969-09-26
Yellow Submarine (Remastered) , 1969-01-17
The Beatles (Remastered) , 1968-11-22
Magical Mystery Tour (Remastered) , 1967-11-27
Sgt. Pepper's Lonely Hearts Club Band (Deluxe Edition) , 1967-06-01
Sgt. Pepper's Lonely Hearts Club Band (Remastered) , 1967-06-01
Sgt. Pepper's Lonely Hearts Club Band (Super Deluxe Edition) , 1967-05-26
Revolver (Remastered) , 1966-08-05
Rubber Soul (Remastered) , 1965-12-03
Help! (Remastered) , 1965-08-06
Beatles For Sale (Remastered) , 1964-12-04
A Hard Day's Night (Remastered) , 1964-07-10
With 

# Getting Tracks in each album

In [9]:
data = []   # will hold all track info
albums = [] # to keep track of duplicates

In [10]:
data

[]

In [11]:
# loop over albums and get all tracks
for album in d['items']:
    album_name = album['name']

    # here's a hacky way to skip over albums we've already grabbed
    trim_name = album_name.split('(')[0].strip()
    if trim_name.upper() in albums or int(album['release_date'][:4]) > 1971:
        continue
    albums.append(trim_name.upper()) # use upper() to standardize
    
    print(album_name)
    
    # pull all tracks from this album
    r = requests.get(BASE_URL + 'albums/' + album['id'] + '/tracks', 
        headers=headers)
    tracks = r.json()['items']
    
    for track in tracks:
        # get audio features (key, liveness, danceability, ...)
        f = requests.get(BASE_URL + 'audio-features/' + track['id'], 
            headers=headers)
        f = f.json()
        
        # combine with album info
        f.update({
            'track_name': track['name'],
            'track_number':track["track_number"],
            'short_album_name': trim_name,
            'release_date': album['release_date'],
            'album_cover': album['images'][0]['url']
        })
        
        data.append(f)

Let It Be (Remastered)
Abbey Road (Remastered)
Yellow Submarine (Remastered)
The Beatles (Remastered)
Magical Mystery Tour (Remastered)
Sgt. Pepper's Lonely Hearts Club Band (Deluxe Edition)
Revolver (Remastered)
Rubber Soul (Remastered)
Help! (Remastered)
Beatles For Sale (Remastered)
A Hard Day's Night (Remastered)
With The Beatles (Remastered)
Please Please Me (Remastered)


# Converting to a Data Frame

In [12]:
df = pd.DataFrame(data)

So we can see the full url for the album image 

In [13]:
pd.options.display.float_format = '{:,.2f}'.format

In [14]:
# convert release_date to an actual date, and sort by it

In [15]:
df['release_date'] = pd.to_datetime(df['release_date'])

In [16]:
df = df.sort_values(by='release_date')

In [17]:
df['duration_seconds'] = df['duration_ms']*0.001

# Clean title track column with only the title name
a lot of tracks have -Remastered

In [18]:
df["track_name"]=df['track_name'].str.split('-', expand=True)[0]


# Drop columns not useful in analysis 

In [19]:
df.isnull().sum()

danceability        0
energy              0
key                 0
loudness            0
mode                0
speechiness         0
acousticness        0
instrumentalness    0
liveness            0
valence             0
tempo               0
type                0
id                  0
uri                 0
track_href          0
analysis_url        0
duration_ms         0
time_signature      0
track_name          0
track_number        0
short_album_name    0
release_date        0
album_cover         0
duration_seconds    0
dtype: int64

In [20]:
df.drop('type', inplace=True, axis=1)

In [21]:
df.drop('uri', inplace=True, axis=1)

In [22]:
df.drop('id', inplace=True, axis=1)

In [23]:
df.drop('track_href', inplace=True, axis=1)

In [24]:
df.drop('duration_ms', inplace=True, axis=1)

In [25]:
df.drop('analysis_url', inplace=True, axis=1)

In [26]:
df["short_album_name"].unique()

array(['Please Please Me', 'With The Beatles', "A Hard Day's Night",
       'Beatles For Sale', 'Help!', 'Rubber Soul', 'Revolver',
       "Sgt. Pepper's Lonely Hearts Club Band", 'Magical Mystery Tour',
       'The Beatles', 'Yellow Submarine', 'Abbey Road', 'Let It Be'],
      dtype=object)

# Creating first track and last track column

In [27]:
df['track1'] = np.where(df['track_number']== 1, 1, 0)

In [28]:
#df[df["short_album_name"]=="Help!"] # 14 tracks
#df[df["short_album_name"]=="Let It Be"] 12
#df[df["short_album_name"]=="Rubber Soul"] 14
#df[df["short_album_name"]=="Revolver"]#14
#df[df["short_album_name"]=="A Hard Day's Night"] 13
#df[df["short_album_name"]=="Sgt. Pepper's Lonely Hearts Club Band"] #13
#df[df["short_album_name"]=="Magical Mystery Tour"] 11
#df[df["short_album_name"]=="The Beatles"] #17
#df[df["short_album_name"]=="With The Beatles"] 14
#df[df["short_album_name"]=="Beatles For Sale"] #14
#df[df["short_album_name"]=="Yellow Submarine"] #13
#df[df["short_album_name"]=="Please Please Me"] #14
#df[df["short_album_name"]=="Abbey Road"] # 17 tracks


In [29]:
conditions = [
    (df['track_number'] == 14)& (df['short_album_name'] =="Help!"),
    (df['track_number'] == 12)& (df['short_album_name'] =="Let It Be"),
    (df['track_number'] == 14)& (df['short_album_name'] =="Rubber Soul"),
    (df['track_number'] == 14)& (df['short_album_name'] =="Revolver"),
    (df['track_number'] == 13)& (df['short_album_name'] =="A Hard Day's Night"),
    (df['track_number'] == 13)& (df['short_album_name'] =="Sgt. Pepper's Lonely Hearts Club Band"),
    (df['track_number'] == 11)& (df['short_album_name'] =="Magical Mystery Tour"),
    (df['track_number'] == 17)& (df['short_album_name'] =="The Beatles"),
    (df['track_number'] == 14)& (df['short_album_name'] =="With The Beatles"),
    (df['track_number'] == 14)& (df['short_album_name'] =="Beatles For Sale"),
    (df['track_number'] == 13)& (df['short_album_name'] =="Yellow Submarine"),
    (df['track_number'] == 14)& (df['short_album_name'] =="Please Please Me"),
    (df['track_number'] == 17)& (df['short_album_name'] =="Abbey Road"),
     ]

# create a list of the values we want to assign for each condition
values = [1,1,1,1,1,1,1,1,1,1,1,1,1]

# create a new column and use np.select to assign values to it using our lists as arguments
df['last_track'] = np.select(conditions, values)

In [32]:
df

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_name,track_number,short_album_name,release_date,album_cover,duration_seconds,track1,last_track
189,0.48,0.85,2,-9.20,1,0.05,0.64,0.00,0.04,0.94,124.63,4,Twist And Shout,14,Please Please Me,1963-03-22,https://i.scdn.co/image/ab67616d0000b273dbeec63ad914c973e75c24df,155.23,0,1
176,0.49,0.80,4,-9.84,1,0.04,0.27,0.00,0.07,0.97,160.11,4,I Saw Her Standing There,1,Please Please Me,1963-03-22,https://i.scdn.co/image/ab67616d0000b273dbeec63ad914c973e75c24df,173.95,1,0
177,0.59,0.60,0,-10.93,1,0.04,0.71,0.00,0.31,0.88,133.35,4,Misery,2,Please Please Me,1963-03-22,https://i.scdn.co/image/ab67616d0000b273dbeec63ad914c973e75c24df,108.55,0,0
178,0.61,0.56,2,-11.06,1,0.03,0.64,0.00,0.06,0.83,109.29,4,Anna (Go To Him),3,Please Please Me,1963-03-22,https://i.scdn.co/image/ab67616d0000b273dbeec63ad914c973e75c24df,177.13,0,0
179,0.65,0.56,10,-10.55,1,0.03,0.61,0.00,0.13,0.93,129.88,4,Chains,4,Please Please Me,1963-03-22,https://i.scdn.co/image/ab67616d0000b273dbeec63ad914c973e75c24df,145.08,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8,0.55,0.83,4,-6.43,1,0.07,0.03,0.00,0.91,0.89,90.95,4,One After 909,9,Let It Be,1970-05-08,https://i.scdn.co/image/ab67616d0000b27384243a01af3c77b56fe01ab1,173.96,0,0
10,0.88,0.56,2,-10.77,1,0.09,0.24,0.05,0.24,0.95,128.54,4,For You Blue,11,Let It Be,1970-05-08,https://i.scdn.co/image/ab67616d0000b27384243a01af3c77b56fe01ab1,152.21,0,0
11,0.76,0.59,2,-9.84,1,0.06,0.49,0.01,0.61,0.33,123.09,4,Get Back,12,Let It Be,1970-05-08,https://i.scdn.co/image/ab67616d0000b27384243a01af3c77b56fe01ab1,189.39,0,1
6,0.52,0.52,2,-12.04,0,0.21,0.38,0.00,0.10,0.52,169.04,4,Maggie Mae,7,Let It Be,1970-05-08,https://i.scdn.co/image/ab67616d0000b27384243a01af3c77b56fe01ab1,40.04,0,0


# Export Data Frame

In [30]:
df.to_csv("beatles.csv")