In [2]:
import pandas as pd
import spotipy
import os
import requests
import re
from datetime import datetime
from dotenv import load_dotenv
from spotipy.oauth2 import SpotifyClientCredentials
from bs4 import BeautifulSoup

load_dotenv()

SPOTIFY_CID = os.environ.get("SPOTIFY_CID")
SPOTIFY_SECRET = os.environ.get("SPOTIFY_SECRET")

In [3]:
def spotify_auth_obj():
    client_credentials_manager = SpotifyClientCredentials(client_id=SPOTIFY_CID, client_secret=SPOTIFY_SECRET)
    return spotipy.Spotify(client_credentials_manager = client_credentials_manager)

In [4]:
def get_albums(artist, spotify_obj):
    artist_id = spotify_obj.search(q=artist, type='artist')['artists']['items'][0]['id']
    return spotify_obj.artist_albums(artist_id=artist_id, limit=50, album_type='album')

In [5]:
def discography_df(albumns, spotify, end=None, patterns=None, ignore=None):
    track_list = []
    album_list = []
    pop_list = []
    
    date = end if end!=None else '2021'
    
    for album in albums['items'][::-1]:
        if ignore in album['name'] or album['release_date'][:4] > date:
            continue
            
        tracks = sp.album_tracks(album['id'])
        for track in tracks['items']:
            album_list.append(clear_title(album['name'], patterns))
            track_list.append(clear_title(track['name'], patterns))
            pop_list.append(sp.track(track['id'])['popularity'])        
                
    
    return pd.DataFrame({       
        'album':album_list,
        'track':track_list,
        'popularity':pop_list})

In [197]:
def clear_title(title, patterns):
    
    if patterns == None:
        return title
    
    for pattern in patterns:
        title = re.sub(pattern, '', title)
    return title.rstrip()

In [159]:
sp = spotify_auth_obj()

In [199]:
albums = get_albums("Beatles", sp)

In [208]:
patterns = ['\(.+\)', 'Remastered', '-*', '2009']

discography = discography_df(albums, sp, end='1971', patterns=patterns, ignore='Deluxe')

In [209]:
tmp_series = discography[['album', 'popularity']].groupby(by='album').sum()['popularity']

tmp_series = tmp_series.reindex(discography[['album', 'popularity']].groupby(by='album').sum().index)

In [210]:
discography['album_popularity'] = discography['album'].map(tmp_series)

In [211]:
discography['album_tracks'] = discography['album'].map(discography['album'].value_counts())

In [212]:
discography['avg_album_popularity'] = round(discography['album_popularity']/discography['album_tracks'], 2)

In [213]:
discography

Unnamed: 0,album,track,popularity,album_popularity,album_tracks,avg_album_popularity
0,Please Please Me,I Saw Her Standing There,70,819,14,58.50
1,Please Please Me,Misery,53,819,14,58.50
2,Please Please Me,Anna,61,819,14,58.50
3,Please Please Me,Chains,50,819,14,58.50
4,Please Please Me,Boys,51,819,14,58.50
...,...,...,...,...,...,...
188,Let It Be,I've Got A Feeling,64,776,12,64.67
189,Let It Be,One After 909,60,776,12,64.67
190,Let It Be,The Long And Winding Road,67,776,12,64.67
191,Let It Be,For You Blue,59,776,12,64.67


In [214]:
discography[['album', 'avg_album_popularity']].drop_duplicates().sort_values('avg_album_popularity', ascending=False)

Unnamed: 0,album,avg_album_popularity
181,Let It Be,64.67
164,Abbey Road,63.41
69,Rubber Soul,61.79
110,Magical Mystery Tour,61.27
83,Revolver,61.21
97,Sgt. Pepper's Lonely Hearts Club Band,61.0
55,Help!,58.79
0,Please Please Me,58.5
121,The Beatles,58.03
28,A Hard Day's Night,57.54


In [215]:
discography[['album', 'album_popularity']].drop_duplicates().sort_values('album_popularity', ascending=False)

Unnamed: 0,album,album_popularity
121,The Beatles,1741
164,Abbey Road,1078
69,Rubber Soul,865
83,Revolver,857
55,Help!,823
0,Please Please Me,819
97,Sgt. Pepper's Lonely Hearts Club Band,793
14,With The Beatles,777
181,Let It Be,776
28,A Hard Day's Night,748


In [217]:
discography.to_csv('beatles_set.csv', index=False)

In [1]:
discography

NameError: name 'discography' is not defined