# Analyzing LastFM API


## Importing historic data from LastFM API:

In [1]:
import lastfm
lastfm.import_historic_data(file_path='lastfm_data', initial_year=2019,end_year=2021)

saved lastfm_data2019.csv
saved lastfm_data2020.csv
saved lastfm_data2021.csv


'files saved correctly at lastfm_data'

## Reading imported files:

In [1]:
import pandas 
import glob
import os

path = os.getcwd()
lasffm_files = glob.glob(path + '/*.csv')
df_files = [pandas.read_csv(file, index_col=0) for file in lasffm_files]
lastfm_df = pandas.concat(df_files)
# adding datepart information
lastfm_df = lastfm_df[lastfm_df.date_uts.notnull()] # dropping null dates
lastfm_df['complete_date'] = pandas.to_datetime(lastfm_df.date_uts,dayfirst=True,unit='s')
lastfm_df['simple_date'] = lastfm_df.complete_date.dt.date
lastfm_df['simple_time'] = lastfm_df.complete_date.dt.time
lastfm_df['year'] = lastfm_df.complete_date.dt.year.astype(int)
lastfm_df['month'] = lastfm_df.complete_date.dt.month.astype(int)

lastfm_df.head(10)

Unnamed: 0,artist_name,album_name,date_uts,date,track_name,loved_track,complete_date,simple_date,simple_time,year,month
0,Haken,Virus,1609025458,2020-12-26 20:30:58,Carousel,0,2020-12-26 23:30:58,2020-12-26,23:30:58,2020,12
1,Haken,Virus,1609025056,2020-12-26 20:24:16,Invasion,0,2020-12-26 23:24:16,2020-12-26,23:24:16,2020,12
2,Haken,Virus,1609024695,2020-12-26 20:18:15,Prosthetic,0,2020-12-26 23:18:15,2020-12-26,23:18:15,2020,12
3,Pain of Salvation,Remedy Lane Re:lived,1608749711,2020-12-23 15:55:11,Beyond the Pale (Live),0,2020-12-23 18:55:11,2020-12-23,18:55:11,2020,12
4,Pain of Salvation,Remedy Lane Re:lived,1608749419,2020-12-23 15:50:19,Second Love (Live),0,2020-12-23 18:50:19,2020-12-23,18:50:19,2020,12
5,Pain of Salvation,Remedy Lane Re:lived,1608749078,2020-12-23 15:44:38,Waking Every God (Live),0,2020-12-23 18:44:38,2020-12-23,18:44:38,2020,12
6,Pain of Salvation,Remedy Lane Re:lived,1608748664,2020-12-23 15:37:44,Dryad Of The Woods (Live),0,2020-12-23 18:37:44,2020-12-23,18:37:44,2020,12
7,Pain of Salvation,Remedy Lane Re:lived,1608748387,2020-12-23 15:33:07,Chain Sling (Live),0,2020-12-23 18:33:07,2020-12-23,18:33:07,2020,12
8,Pain of Salvation,Remedy Lane Re:lived,1608747955,2020-12-23 15:25:55,Rope Ends (Live),0,2020-12-23 18:25:55,2020-12-23,18:25:55,2020,12
9,Pain of Salvation,Remedy Lane Re:lived,1608747668,2020-12-23 15:21:08,Undertow (live),0,2020-12-23 18:21:08,2020-12-23,18:21:08,2020,12


In [2]:
years_list = lastfm_df.year.unique()
years_list.sort()
years_list


array([2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019,
       2020, 2021])

## Given a date, top albuns from the past:

In [3]:
# import date, datetime
from datetime import datetime, date
import time

today = datetime.today()
today_day = today.day
today_month = today.month
today_year = today.year


df_list = []
for year in years_list:
    day_filter = date(year,today_month,today_day)
    df = lastfm_df[lastfm_df.simple_date == day_filter]
    df = df.groupby(['year','album_name','artist_name']).count().reset_index()
    df = df[['year','album_name','artist_name','date']]
    df.columns = ['year', 'album', 'artist', 'frequency']
    df = df.head(1)
    df_list.append(df)

in_the_past = pandas.concat(df_list)
in_the_past.head(20)

Unnamed: 0,year,album,artist,frequency
0,2009,Atom Heart Mother,Pink Floyd,7
0,2010,Communiqué,Dire Straits,14
0,2011,Crimson Thunder,HammerFall,6
0,2012,1962-1966 (CD 02),The Beatles,2
0,2013,... Best II,The Smiths,13
0,2016,Live At Budokan,Dream Theater,6
0,2018,Actual Fantasy Revisited,Ayreon,6
0,2019,Iconoclast,Symphony X,12
0,2020,Didn't It Rain (Deluxe),Hugh Laurie,16


## Some loved tracks

In [4]:
loved_album = lastfm_df.groupby(['year','album_name','artist_name']).sum().reset_index()
loved_album = loved_album[['year','album_name','artist_name','loved_track']]
loved_album.columns = ['year', 'album', 'artist', 'loved_tracks']
played_album = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
played_album = played_album[['year','album_name','artist_name','date']]
played_album.columns = ['year', 'album', 'artist', 'times_played']
loved_album = loved_album.merge(played_album, how='inner', on=['year','album','artist'])
loved_album['artist_album'] = loved_album.artist + ' - ' + loved_album.album
loved_album = loved_album.sort_values(by=['year','loved_tracks'], ascending=False)
loved_album.head()

Unnamed: 0,year,album,artist,loved_tracks,times_played,artist_album
10417,2021,The Radio Waves Goodbye,John Macaluso & Union Radio,47,201,John Macaluso & Union Radio - The Radio Waves ...
10457,2021,Unweaving the Rainbow,Frameshift,44,55,Frameshift - Unweaving the Rainbow
10329,2021,Seventh Son of a Seventh Son,Iron Maiden,40,64,Iron Maiden - Seventh Son of a Seventh Son
10309,2021,Remedy Lane Re:lived,Pain of Salvation,38,495,Pain of Salvation - Remedy Lane Re:lived
9940,2021,A Momentary Lapse of Reason,Pink Floyd,29,61,Pink Floyd - A Momentary Lapse of Reason


### Loved & Appreciated Albuns

In [16]:
appreciated_df = loved_album[loved_album.loved_tracks>0]
appreciated_df = appreciated_df.sort_values(by=['year','times_played'], ascending=False)
list_of_appreciated = []
for year in years_list:
    df = appreciated_df[appreciated_df.year == year].head(3)
    df = df.artist_album.unique()
    for i in df:
        list_of_appreciated.append(i)

appreciated = loved_album[loved_album.artist_album.isin(list_of_appreciated)]
appreciated = appreciated.sort_values(by='times_played',ascending=False)

appreciated.pivot(index='artist_album',columns='year',values='times_played')


year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AC/DC - Black Ice,,,,,230.0,32.0,,,17.0,24.0,,1.0,
AC/DC - Blow Up Your Video,,,,,131.0,198.0,,,11.0,,11.0,19.0,
Alan Parsons Project - Freudiana,,,,28.0,254.0,214.0,,,25.0,,,,
Blind Guardian - Live,218.0,94.0,41.0,20.0,,105.0,59.0,110.0,130.0,112.0,213.0,34.0,103.0
Buffalo Springfield - Buffalo Springfield,,,,61.0,1.0,12.0,12.0,89.0,11.0,,,28.0,
Dream Theater - Falling Into Infinity,282.0,31.0,151.0,43.0,,,,,4.0,11.0,86.0,69.0,21.0
Genesis - Turn It on Again: The Hits,,,308.0,90.0,,,7.0,,24.0,,,,
Haken - Virus,,,,,,,,,,,,405.0,132.0
John Macaluso & Union Radio - Radio Waves Goodbye,,309.0,70.0,129.0,31.0,27.0,,,22.0,,,,
John Macaluso & Union Radio - The Radio Waves Goodbye,,,,,,,44.0,,,17.0,156.0,299.0,201.0


### Loved & Forgotten Albuns

In [7]:
forgotten_df = loved_album[loved_album.loved_tracks>0]
forgotten_df = forgotten_df.sort_values(by=['year','times_played'], ascending=True)
list_of_forgotten = []
for year in years_list:
    df = forgotten_df[forgotten_df.year == year].head(3)
    df = str(df.artist_album.values).strip('[').strip(']').strip("'")
    list_of_forgotten.append(df)
forgotten = loved_album[loved_album.artist_album.isin(list_of_forgotten)]
forgotten.pivot(index='artist_album',columns='year',values='times_played')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2020,2021
artist_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
George Bruns - A Musical History of Disneyland Disc 2,,,,,2.0,1.0,,,,,,
INXS - Donnie Darko (Soundtrack & Score),,22.0,9.0,3.0,4.0,2.0,,,1.0,,,
"Marillion - A Singles Collection 1982-1992: Six Of One, Half-Dozen Of The Other",,,,,,,,,,2.0,1.0,
Men at Work - ANOS 80 - vol 2 - CD3,1.0,,,,,,,,,,,
Men at Work - Anos 80 - CD1,1.0,,1.0,,,,,,,,,
Michael Sembello - Flashdance Original Soundtrack from the Motion Picture,,,,,,,,1.0,,,,
Pink Floyd - A Momentary Lapse of Reason,94.0,79.0,159.0,67.0,120.0,24.0,,,20.0,1.0,40.0,61.0
Queen - A Night At The Opera (2011 Remaster),,,,,,5.0,1.0,,1.0,2.0,,
The Beatles - 1967-1970 Blue Album CD2,19.0,1.0,6.0,13.0,11.0,,,,,,,
The Who - A Quick One,55.0,35.0,71.0,64.0,1.0,,,,,,,


In [None]:
# import date, datetime
from datetime import datetime, date
import time

today = datetime.today()
today_day = today.day
today_month = today.month
today_year = today.year


df_list = []
for year in years_list:
    day_filter = date(year,today_month,today_day)
    df = lastfm_df[lastfm_df.simple_date == day_filter]
    df = df.groupby(['year','album_name','artist_name']).count().reset_index()
    df = df[['year','album_name','artist_name','date']]
    df.columns = ['year', 'album', 'artist', 'frequency']
    df = df.head(1)
    df_list.append(df)

in_the_past = pandas.concat(df_list)
in_the_past.head(20)

In [26]:
loved_album = lastfm_df.groupby(['year','album_name','artist_name']).sum().reset_index()
loved_album = loved_album[['year','album_name','artist_name','loved_track']]
loved_album.columns = ['year', 'album', 'artist', 'loved_tracks']
played_album = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
played_album = played_album[['year','album_name','artist_name','date']]
played_album.columns = ['year', 'album', 'artist', 'times_played']
loved_album = loved_album.merge(played_album, how='inner', on=['year','album','artist'])
loved_album['artist_album'] = loved_album.artist + ' - ' + loved_album.album
# loved_album[''] = loved_album.artist + ' - ' + loved_album.album

loved_album = loved_album.sort_values(by=['year','loved_tracks'], ascending=False)

loved_album.head(20)
# loved_album.pivot(index='artist_album',columns='year',values='times_played')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
'Til Tuesday - Voices Carry,,,,,,,,,2.0,1.0,,,
1000mods - Super Van Vacation,,,,,,,,,3.0,,,,
1000mods - Vultures,,,,,,,,,1.0,,,,
10cc - The Complete UK Recordings,,,,,,,,,,,3.0,,
2Cellos - 2Cellos,,,,,,,7.0,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
will.i.am - Music from Baz Luhrmann's Film The Great Gatsby (Deluxe Edition),,,,,,,,,,1.0,,,
Édith Piaf - Edith Piaf - All the Best,,,,,,,,16.0,,,,,
Édouard Lalo - Lalo: Symphonie espagnole - Bruch: Violin Concerto,,,,,,,,,2.0,,,,
Édouard Lalo - Tchaikovsky: Violin Concerto - Lalo: Symphonie espagnole,,,,,,,,,1.0,,,,


In [6]:
loved_ones = lastfm_df[lastfm_df.loved_track == 1]
loved_ones = loved_ones.groupby(['year','track_name','artist_name']).count().reset_index()
loved_ones = loved_ones[['year','track_name','artist_name','date']]
loved_ones.columns = ['year', 'track', 'artist', 'frequency']
loved_ones['song'] = loved_ones.artist + ' - ' + loved_ones.track
loved_ones = loved_ones.sort_values(by=['year','frequency'], ascending=False)
loved_ones.head()

Unnamed: 0,year,track,artist,frequency,song
3368,2021,A Trace Of Blood (Live),Pain of Salvation,38,Pain of Salvation - A Trace Of Blood (Live)
3495,2021,Soul in Your Mind,John Macaluso & Union Radio,17,John Macaluso & Union Radio - Soul in Your Mind
3501,2021,T-34,John Macaluso & Union Radio,17,John Macaluso & Union Radio - T-34
3474,2021,Pretzel,John Macaluso & Union Radio,16,John Macaluso & Union Radio - Pretzel
3388,2021,Canary Yellow,Haken,15,Haken - Canary Yellow


In [None]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns['artist_and_album'] = top_albuns.artist + ' - ' + top_albuns.album
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

albuns_list = []
for year in years_list:
    df = top_albuns[top_albuns.year == year].head(1)
    albuns_list.append(str(df.artist_and_album.values).strip('[').strip(']').strip("'"))
top_albuns_per_year = top_albuns[top_albuns.artist_and_album.isin(albuns_list)]

top_albuns_per_year.pivot(index='artist_and_album',columns='year',values='frequency')

## Top albuns per year:

A pivot table showing the top albuns for each year and their frequencies along the years.

In [6]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns['artist_and_album'] = top_albuns.artist + ' - ' + top_albuns.album
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

albuns_list = []
for year in years_list:
    df = top_albuns[top_albuns.year == year].head(1)
    albuns_list.append(str(df.artist_and_album.values).strip('[').strip(']').strip("'"))
top_albuns_per_year = top_albuns[top_albuns.artist_and_album.isin(albuns_list)]

top_albuns_per_year.pivot(index='artist_and_album',columns='year',values='frequency')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_and_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alan Parsons - Try Anything Once,,,,302.0,43.0,9.0,,,,,,93.0,31.0
Avantasia - The Wicked Symphony,,318.0,146.0,74.0,25.0,,33.0,,27.0,,72.0,86.0,80.0
David Bowie - A Reality Tour,,,,,,,,155.0,71.0,2.0,274.0,12.0,
Gamma Ray - The Best (Of),,,,,,,27.0,4.0,145.0,302.0,437.0,163.0,278.0
Kaiser Chiefs - Employment,,,,,,296.0,26.0,18.0,7.0,,,19.0,
Pain of Salvation - Remedy Lane Re:lived,,,,,,,,,,,,448.0,495.0
Pain of Salvation - The Perfect Element (Part I),,100.0,468.0,56.0,30.0,,,,16.0,,,299.0,12.0
Pink Floyd - Dark Side of the Moon,65.0,18.0,61.0,57.0,147.0,9.0,,,505.0,,,,
Porcupine Tree - Lightbulb Sun,,,,,10.0,10.0,190.0,,144.0,112.0,161.0,20.0,30.0
Rage - From the Cradle to the Stage: 20th Anniversary,,,,,16.0,11.0,,,39.0,377.0,52.0,,


## Top 5 albuns for each year:


In [9]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

df_list = []
for year in years_list:
    df = top_albuns[top_albuns.year == year].head(5)
    df_list.append(df)

top_5_albuns_per_year = pandas.concat(df_list).reset_index()
top_5_albuns_per_year.head(70)

Unnamed: 0,index,year,album,artist,frequency
0,415,2009,Signals,Rush,498
1,178,2009,Final Fantasy - The Black Mages,Nobuo Uematsu,300
2,171,2009,Falling Into Infinity,Dream Theater,282
3,14,2009,"20,000 Watt R.S.L.: Greatest Hits",Midnight Oil,267
4,270,2009,Live (CD 02),Blind Guardian,261
5,1317,2010,The Wicked Symphony,Avantasia,318
6,1126,2010,Radio Waves Goodbye,John Macaluso & Union Radio,309
7,651,2010,Angel of Babylon,Avantasia,288
8,1298,2010,The Second Death Of Pain Of Salvation,Pain of Salvation,245
9,1361,2010,V,Symphony X,240
