# Analyzing LastFM API


## Importing historic data from LastFM API:

In [1]:
import lastfm
lastfm.import_historic_data(file_path='lastfm_data', initial_year=2019,end_year=2021)

saved lastfm_data2019.csv
saved lastfm_data2020.csv
saved lastfm_data2021.csv


'files saved correctly at lastfm_data'

## Reading imported files:

In [1]:
import pandas 
import glob
import os

path = os.getcwd()
lasffm_files = glob.glob(path + '/*.csv')
df_files = [pandas.read_csv(file, index_col=0) for file in lasffm_files]
lastfm_df = pandas.concat(df_files)
# adding datepart information
lastfm_df = lastfm_df[lastfm_df.date_uts.notnull()] # dropping null dates
lastfm_df['complete_date'] = pandas.to_datetime(lastfm_df.date_uts,dayfirst=True,unit='s')
lastfm_df['simple_date'] = lastfm_df.complete_date.dt.date
lastfm_df['simple_time'] = lastfm_df.complete_date.dt.time
lastfm_df['year'] = lastfm_df.complete_date.dt.year.astype(int)
lastfm_df['month'] = lastfm_df.complete_date.dt.month.astype(int)

lastfm_df.head(10)

Unnamed: 0,artist_name,album_name,track_name,loved_track,date_uts,date,complete_date,simple_date,simple_time,year,month
1,Haken,Virus,Carousel,0,1609025000.0,2020-12-26 20:30:58,2020-12-26 23:30:58,2020-12-26,23:30:58,2020,12
2,Haken,Virus,Invasion,0,1609025000.0,2020-12-26 20:24:16,2020-12-26 23:24:16,2020-12-26,23:24:16,2020,12
3,Haken,Virus,Prosthetic,0,1609025000.0,2020-12-26 20:18:15,2020-12-26 23:18:15,2020-12-26,23:18:15,2020,12
4,Pain of Salvation,Remedy Lane Re:lived,Beyond the Pale (Live),0,1608750000.0,2020-12-23 15:55:11,2020-12-23 18:55:11,2020-12-23,18:55:11,2020,12
5,Pain of Salvation,Remedy Lane Re:lived,Second Love (Live),0,1608749000.0,2020-12-23 15:50:19,2020-12-23 18:50:19,2020-12-23,18:50:19,2020,12
6,Pain of Salvation,Remedy Lane Re:lived,Waking Every God (Live),0,1608749000.0,2020-12-23 15:44:38,2020-12-23 18:44:38,2020-12-23,18:44:38,2020,12
7,Pain of Salvation,Remedy Lane Re:lived,Dryad Of The Woods (Live),0,1608749000.0,2020-12-23 15:37:44,2020-12-23 18:37:44,2020-12-23,18:37:44,2020,12
8,Pain of Salvation,Remedy Lane Re:lived,Chain Sling (Live),0,1608748000.0,2020-12-23 15:33:07,2020-12-23 18:33:07,2020-12-23,18:33:07,2020,12
9,Pain of Salvation,Remedy Lane Re:lived,Rope Ends (Live),0,1608748000.0,2020-12-23 15:25:55,2020-12-23 18:25:55,2020-12-23,18:25:55,2020,12
10,Pain of Salvation,Remedy Lane Re:lived,Undertow (live),0,1608748000.0,2020-12-23 15:21:08,2020-12-23 18:21:08,2020-12-23,18:21:08,2020,12


In [2]:
years_list = lastfm_df.year.unique()
years_list.sort()
years_list


array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021])

## Given a date, top albuns from the past:

In [3]:
# import date, datetime
from datetime import datetime, date
import time

today = datetime.today()
today_day = today.day
today_month = today.month
today_year = today.year


df_list = []
for year in years_list:
    day_filter = date(year,today_month,today_day)
    df = lastfm_df[lastfm_df.simple_date == day_filter]
    df = df.groupby(['year','album_name','artist_name']).count().reset_index()
    df = df[['year','album_name','artist_name','date']]
    df.columns = ['year', 'album', 'artist', 'frequency']
    df = df.head(1)
    df_list.append(df)

in_the_past = pandas.concat(df_list)
in_the_past.head(20)

Unnamed: 0,year,album,artist,frequency
0,2009,A Saucerful of Secrets,Pink Floyd,7
0,2010,"1,039/Smoothed Out Slappy Hours",Green Day,2
0,2011,... Best II,The Smiths,2
0,2012,"1990-05-15 The Malting, Snape, UK",The Notting Hillbillies,25
0,2013,Empire,Queensrÿche,1
0,2014,Leftoverture,Kansas,1
0,2015,AC/DC Live,AC/DC,2
0,2016,Riding With the King,Eric Clapton,15
0,2019,Dream Theater,Dream Theater,1
0,2020,Alive In Athens (Live),Iced Earth,5


## Some loved tracks

In [4]:
loved_album = lastfm_df.groupby(['year','album_name','artist_name']).sum().reset_index()
loved_album = loved_album[['year','album_name','artist_name','loved_track']]
loved_album.columns = ['year', 'album', 'artist', 'loved_tracks']
played_album = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
played_album = played_album[['year','album_name','artist_name','date']]
played_album.columns = ['year', 'album', 'artist', 'times_played']
loved_album = loved_album.merge(played_album, how='inner', on=['year','album','artist'])
loved_album['artist_album'] = loved_album.artist + ' - ' + loved_album.album
loved_album = loved_album.sort_values(by=['year','loved_tracks'], ascending=False)
loved_album.head()

Unnamed: 0,year,album,artist,loved_tracks,times_played,artist_album
10553,2021,Unweaving the Rainbow,Frameshift,80,100,Frameshift - Unweaving the Rainbow
10465,2021,The Dark Side of the Moon,Pink Floyd,66,73,Pink Floyd - The Dark Side of the Moon
10398,2021,Seventh Son of a Seventh Son,Iron Maiden,50,80,Iron Maiden - Seventh Son of a Seventh Son
10508,2021,The Radio Waves Goodbye,John Macaluso & Union Radio,50,214,John Macaluso & Union Radio - The Radio Waves ...
10375,2021,Remedy Lane Re:lived,Pain of Salvation,39,508,Pain of Salvation - Remedy Lane Re:lived


### Loved & Appreciated Albuns

In [5]:
appreciated_df = loved_album[loved_album.loved_tracks>0]
appreciated_df = appreciated_df.sort_values(by=['year','times_played'], ascending=False)
list_of_appreciated = []
for year in years_list:
    df = appreciated_df[appreciated_df.year == year].head(3)
    df = df.artist_album.unique()
    for i in df:
        list_of_appreciated.append(i)
appreciated = loved_album[loved_album.artist_album.isin(list_of_appreciated)]
appreciated = appreciated.sort_values(by='times_played',ascending=False)
appreciated.pivot(index='artist_album',columns='year',values='times_played')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AC/DC - Black Ice,,,,,230.0,32.0,,,17.0,24.0,,1.0,
AC/DC - Blow Up Your Video,,,,,131.0,198.0,,,11.0,,11.0,19.0,
Alan Parsons Project - Freudiana,,,,28.0,254.0,214.0,,,25.0,,,,
Blind Guardian - Live,218.0,94.0,41.0,20.0,,105.0,59.0,110.0,130.0,112.0,213.0,34.0,124.0
Buffalo Springfield - Buffalo Springfield,,,,61.0,1.0,12.0,12.0,89.0,11.0,,,28.0,
Dream Theater - Falling Into Infinity,282.0,31.0,151.0,43.0,,,,,4.0,11.0,86.0,69.0,21.0
Genesis - Turn It on Again: The Hits,,,308.0,90.0,,,7.0,,24.0,,,,
Haken - Virus,,,,,,,,,,,,405.0,180.0
John Macaluso & Union Radio - Radio Waves Goodbye,,309.0,70.0,129.0,31.0,27.0,,,22.0,,,,
John Macaluso & Union Radio - The Radio Waves Goodbye,,,,,,,44.0,,,17.0,156.0,299.0,214.0


### Loved & Forgotten Albuns

In [6]:
forgotten_df = loved_album[loved_album.loved_tracks>0]
forgotten_df = forgotten_df.sort_values(by=['year','times_played'], ascending=True)
list_of_forgotten = []
for year in years_list:
    df = forgotten_df[forgotten_df.year == year].head(3)
    df = df.artist_album.unique()
    for i in df:
        list_of_forgotten.append(i)
forgotten = loved_album[loved_album.artist_album.isin(list_of_forgotten)]
forgotten.pivot(index='artist_album',columns='year',values='times_played')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
America - America,,,,,55.0,110.0,45.0,51.0,44.0,1.0,,,
Asia - Anos 80 - Vol 2 - CD4,,,,,1.0,,,,,,,,
Billy Idol - Anos 80 - Vol 2 - CD2,5.0,1.0,8.0,,,,,,,,,,
Billy Talent - Billy Talent II [UK],40.0,,40.0,24.0,17.0,1.0,,,15.0,,,,
Bon Jovi - Bon Jovi,,,,,,,,,,,,1.0,
Bruce Dickinson - Balls to Picasso,,,,,,1.0,,,3.0,,,,
Duran Duran - Duran Duran,,,,,,,,,,,1.0,,
Eagle Eye Cherry - Desireless,1.0,16.0,18.0,4.0,,,1.0,,,,,,
Eminem - Suicide Squad: The Album,,,,,,,,1.0,,,,,
Frank Zappa - Apostrophe('),,,,,,,,,1.0,18.0,1.0,,


### Loved Tracks

In [7]:
loved_ones = lastfm_df[lastfm_df.loved_track == 1]
loved_ones = loved_ones.groupby(['year','track_name','artist_name']).count().reset_index()
loved_ones = loved_ones[['year','track_name','artist_name','date']]
loved_ones.columns = ['year', 'track', 'artist', 'times_played']
loved_ones['song'] = loved_ones.artist + ' - ' + loved_ones.track
loved_ones = loved_ones.sort_values(by=['year','times_played'], ascending=False)

loved_list = []
for year in years_list:
    df = loved_ones[loved_ones.year == year].head(3)
    df = df.song.unique()
    for i in df:
        loved_list.append(i)

loved_ones_per_year = loved_ones[loved_ones.song.isin(loved_list)]
loved_ones_per_year.pivot(index='song',columns='year',values='times_played')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
song,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AC/DC - Two's Up,,,,,65.0,38.0,,,2.0,,1.0,2.0,
America - A Horse With No Name,,,,,15.0,8.0,7.0,15.0,5.0,1.0,,,
Deep Purple - Lazy,,47.0,28.0,4.0,,6.0,2.0,,2.0,,,,
Genesis - Turn It On Again,,,79.0,19.0,3.0,6.0,1.0,,4.0,1.0,,,
Haken - Canary Yellow,,,,,,,,,,,,40.0,19.0
Haken - The Strain,,,,,,,,,,,,39.0,19.0
Judas Priest - Breaking the Law,,,,15.0,2.0,4.0,12.0,,7.0,12.0,9.0,7.0,3.0
Kaiser Chiefs - I Predict a Riot,,,,,,52.0,4.0,2.0,1.0,4.0,,2.0,
Kenny Loggins - Danger Zone,,,44.0,15.0,4.0,,,,4.0,,,,1.0
Led Zeppelin - In My Time of Dying,,5.0,19.0,8.0,7.0,2.0,1.0,17.0,8.0,3.0,,16.0,8.0


## Top albuns per year:

A pivot table showing the top albuns for each year and their frequencies along the years.

In [8]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns['artist_and_album'] = top_albuns.artist + ' - ' + top_albuns.album
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

albuns_list = []
for year in years_list:
    df = top_albuns[top_albuns.year == year].head(1)
    albuns_list.append(str(df.artist_and_album.values).strip('[').strip(']').strip("'"))
top_albuns_per_year = top_albuns[top_albuns.artist_and_album.isin(albuns_list)]

top_albuns_per_year.pivot(index='artist_and_album',columns='year',values='frequency')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_and_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alan Parsons - Try Anything Once,,,,302.0,43.0,9.0,,,,,,93.0,31.0
Avantasia - The Wicked Symphony,,318.0,146.0,74.0,25.0,,33.0,,27.0,,72.0,86.0,80.0
David Bowie - A Reality Tour,,,,,,,,155.0,71.0,2.0,274.0,12.0,
Gamma Ray - The Best (Of),,,,,,,27.0,4.0,145.0,302.0,437.0,163.0,278.0
Kaiser Chiefs - Employment,,,,,,296.0,26.0,18.0,7.0,,,19.0,
Pain of Salvation - Remedy Lane Re:lived,,,,,,,,,,,,448.0,508.0
Pain of Salvation - The Perfect Element (Part I),,100.0,468.0,56.0,30.0,,,,16.0,,,299.0,12.0
Pink Floyd - Dark Side of the Moon,65.0,18.0,61.0,57.0,147.0,9.0,,,505.0,,,,
Porcupine Tree - Lightbulb Sun,,,,,10.0,10.0,190.0,,144.0,112.0,161.0,20.0,30.0
Rage - From the Cradle to the Stage: 20th Anniversary,,,,,16.0,11.0,,,39.0,377.0,52.0,,


## Top 5 albuns for each year:


In [9]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

df_list = []
for year in years_list:
    df = top_albuns[top_albuns.year == year].head(5)
    df_list.append(df)

top_5_albuns_per_year = pandas.concat(df_list).reset_index()
top_5_albuns_per_year.head(70)

Unnamed: 0,index,year,album,artist,frequency
0,415,2009,Signals,Rush,498
1,178,2009,Final Fantasy - The Black Mages,Nobuo Uematsu,300
2,171,2009,Falling Into Infinity,Dream Theater,282
3,14,2009,"20,000 Watt R.S.L.: Greatest Hits",Midnight Oil,267
4,270,2009,Live (CD 02),Blind Guardian,261
...,...,...,...,...,...
60,10375,2021,Remedy Lane Re:lived,Pain of Salvation,508
61,10213,2021,In Cauda Venenum,Opeth,358
62,9952,2021,Affinity,Haken,307
63,10086,2021,Double LIVE!,Yngwie Malmsteen,297


-- The End --