In [22]:
from datetime import *
from dateutil.relativedelta import *
from IPython.core.display import clear_output
import requests, json, time, requests_cache, re, gspread
import pandas as pd
from tqdm import tqdm
from gspread_dataframe import get_as_dataframe

## Initialize Libraries
tqdm.pandas()
requests_cache.install_cache()

##Get Google Sheet
##Get API from Google for GSpread: https://docs.gspread.org/en/latest/oauth2.html
gc = gspread.service_account()
sheet = gc.open('Albums')
fields = ['Artist', 'Album Name', 'Genre', 'Year', 'Date Listened']

## Set constant variables
## Get LastFM API: https://www.last.fm/api/account/create (Only Application Name needed)
## Leave USER_AGENT
API_KEY= "XXX"
API_SECRET= "XXX"
USER_AGENT = 'Dataquest'
d = datetime.today()
albums=[]
responses = []

#Set Pandas options
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', None)


def lastfm_get(payload):
    # define headers and URL
    headers = {'user-agent': USER_AGENT}
    url = 'https://ws.audioscrobbler.com/2.0/'

    # Add API key and format to the payload
    payload['api_key'] = API_KEY
    payload['format'] = 'json'

    response = requests.get(url, headers=headers, params=payload)

    return response

def jprint(obj):
    # create a formatted string of the Python JSON object
    text = json.dumps(obj, sort_keys=True, indent=4)
    print(text)

def lookup_albumtags(album, artist):
    response = lastfm_get({
        'method': 'album.getTopTags',
        'album':  album,
        'artist': artist
    })

    # if there's an error, just return nothing
    if response.status_code != 200:
        return None

    # extract the top three tags and turn them into a string
    tags = [t['name'] for t in response.json()['toptags']['tag'][:3]]
    tags_str = ', '.join(tags)
    tags_str = re.sub(r'\d+[s]\, ', '', tags_str)
    tags_str = re.sub(r'\d+\, ', '', tags_str)

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return tags_str

def lookup_album(album, artist):
    response = lastfm_get({
        'method': 'album.getInfo',
        'album':  album,
        'artist': artist,
        'extended': 1,
    })

    # if there's an error, just return nothing
    if response.status_code != 200:
        return None

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return response
    
def lookup_trackcount(album, artist):
    response = lookup_album(album, artist)
    trackcount = 0
    if response != None:
      if 'tracks' in response.json()['album']:
        trackcount = int(len(response.json()['album']['tracks']['track']))

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return trackcount

def append_dataframe(username, df1):
  if username == "j21w91":
    worksheet = sheet.worksheet("Willi")
  if username == "mabbott99":
    worksheet = sheet.worksheet("Abbott")
  print("Appending Data to " + str(worksheet) + " on " + str(sheet))
  df1.head()
  df2 = get_as_dataframe(worksheet, usecols=fields, skiprows=6)
  df2 = df2.dropna(how="all")
  df1_list = df1.values.tolist()
  sheet_length = len(df2.index) + 8
  worksheet.append_rows(df1_list, table_range='B' + str(sheet_length))

def lookup_albumyear(album, artist):
    response = lookup_album(album, artist)
    if response != None:
      if 'wiki' in response.json()['album']:
        albumyear = response.json()['album']['wiki']['published']
        albumyear = datetime.strptime(albumyear, '%d %b %Y, %H:%M')
        albumyear = albumyear.strftime("%Y")
      else:
        albumyear = "N/A"

    # rate limiting
    if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
    return albumyear

def get_responses(username, date, date_amount):
    page = 1
    total_pages = 1
    while page <= total_pages:
      print("Requesting page {}/{}".format(page, total_pages))
      clear_output(wait = True)
      if date == "day":
        timestamp = datetime.timestamp(d - relativedelta(days=date_amount))
      elif date == "week":
        timestamp = datetime.timestamp(d - relativedelta(weeks=date_amount))
      elif date == "month":
        timestamp = datetime.timestamp(d - relativedelta(months=date_amount))
      else: 
        print("Date unknown.")
        break
      
      payload = {
        'method': 'user.getRecentTracks',
        'limit': 1000,
        'from': int(timestamp),
        'user': username,
        'page': page,
      }

      response = lastfm_get(payload)
      responses.extend(response.json()['recenttracks']['track'])
      page = int(response.json()['recenttracks']['@attr']['page'])
      total_pages = int(response.json()['recenttracks']['@attr']['totalPages'])
      if response.status_code != 200:
        print(response.text)
        break
      if not getattr(response, 'from_cache', False):
        time.sleep(0.25)
      page += 1

    return responses

def filter_albums(df):
    filt = (df['Listened_Count'] >= df['Track_Count'])
    filt2 = (df['Track_Count'] > 3)
    df = df[filt & filt2].drop(['Track'], axis=1).drop_duplicates('Album Name')
    print("Getting metadata of albums listened to:")
    df['Genre'] = df.progress_apply(lambda x: lookup_albumtags(x['Album Name'], x['Artist']), axis=1)
    df['Year'] = df.progress_apply(lambda x: lookup_albumyear(x['Album Name'], x['Artist']), axis=1)
    df = df.drop(['Listened_Count', 'Track_Count'], axis=1)
    df = df.reindex(['Album Name','Artist','Genre', 'Year', 'Date Listened'], axis=1)
    df = df.iloc[::-1]
    df.reset_index(drop=True, inplace=True)
    return df

def get_uniquealbums(username, date, date_amount):
    tracklist = get_responses(username, date, date_amount)

    for i in tracklist:
      trackname = i['name']
      album = i['album']['#text']
      artist= i['artist']['#text']
      if 'date' in i:
        date = datetime.fromtimestamp(int(i['date']['uts'])).strftime('%d/%m/%Y')
      albums.append([artist, album, date, trackname])

    df = pd.DataFrame(albums)
    df.columns = ['Artist', 'Album Name', 'Date Listened', 'Track']
    df.dropna(inplace = True)
    df = df.drop_duplicates('Track')
    df['Listened_Count'] = df.groupby(['Album Name'])['Track'].transform('nunique')
    print("Getting recent tracks info:")
    df['Track_Count'] = df.progress_apply(lambda x: lookup_trackcount(x['Album Name'], x['Artist']), axis=1)
    df = filter_albums(df)
    append_dataframe(username, df)
    return df


In [None]:
## Run this cell to carry out the process
## get_uniquealbums(lastfm_username, date_length, date_amount)
## date_length = "day", "month" or "year"
get_uniquealbums("j21w91", "month", 2)
