# Analyzing LastFM API


## Importing historic data from LastFM API:

In [1]:
import lastfm
lastfm.import_historic_data(file_path='lastfm_data', initial_year=2019,end_year=2021)

saved lastfm_data2019.csv
saved lastfm_data2020.csv
saved lastfm_data2021.csv


'files saved correctly at lastfm_data'

## Reading imported files:

In [2]:
import pandas 
import glob
import os

path = os.getcwd()
lasffm_files = glob.glob(path + '/*.csv')
df_files = [pandas.read_csv(file, index_col=0) for file in lasffm_files]
lastfm_df = pandas.concat(df_files)
# adding datepart information
lastfm_df = lastfm_df[lastfm_df.date_uts.notnull()] # dropping null dates
lastfm_df['complete_date'] = pandas.to_datetime(lastfm_df.date_uts,dayfirst=True,unit='s')
lastfm_df['year'] = lastfm_df.complete_date.dt.year.astype(int)
lastfm_df['month'] = lastfm_df.complete_date.dt.month.astype(int)

lastfm_df.head(10)

Unnamed: 0,artist_name,album_name,date_uts,date,track_name,loved_track,complete_date,year,month
0,Haken,Virus,1609025458,2020-12-26 20:30:58,Carousel,0,2020-12-26 23:30:58,2020,12
1,Haken,Virus,1609025056,2020-12-26 20:24:16,Invasion,0,2020-12-26 23:24:16,2020,12
2,Haken,Virus,1609024695,2020-12-26 20:18:15,Prosthetic,0,2020-12-26 23:18:15,2020,12
3,Pain of Salvation,Remedy Lane Re:lived,1608749711,2020-12-23 15:55:11,Beyond the Pale (Live),0,2020-12-23 18:55:11,2020,12
4,Pain of Salvation,Remedy Lane Re:lived,1608749419,2020-12-23 15:50:19,Second Love (Live),0,2020-12-23 18:50:19,2020,12
5,Pain of Salvation,Remedy Lane Re:lived,1608749078,2020-12-23 15:44:38,Waking Every God (Live),0,2020-12-23 18:44:38,2020,12
6,Pain of Salvation,Remedy Lane Re:lived,1608748664,2020-12-23 15:37:44,Dryad Of The Woods (Live),0,2020-12-23 18:37:44,2020,12
7,Pain of Salvation,Remedy Lane Re:lived,1608748387,2020-12-23 15:33:07,Chain Sling (Live),0,2020-12-23 18:33:07,2020,12
8,Pain of Salvation,Remedy Lane Re:lived,1608747955,2020-12-23 15:25:55,Rope Ends (Live),0,2020-12-23 18:25:55,2020,12
9,Pain of Salvation,Remedy Lane Re:lived,1608747668,2020-12-23 15:21:08,Undertow (live),0,2020-12-23 18:21:08,2020,12


## Top albuns per year:

A pivot table showing the top albuns for each year and their frequencies along the years.

In [6]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns['artist_and_album'] = top_albuns.artist + ' - ' + top_albuns.album
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

albuns_list = []
for year in range(2009,2021):
    df = top_albuns[top_albuns.year == year].head(1)
    albuns_list.append(str(df.artist_and_album.values).strip('[').strip(']').strip("'"))
top_albuns_per_year = top_albuns[top_albuns.artist_and_album.isin(albuns_list)]

top_albuns_per_year.pivot(index='artist_and_album',columns='year',values='frequency')

year,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021
artist_and_album,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Alan Parsons - Try Anything Once,,,,302.0,43.0,9.0,,,,,,93.0,31.0
Avantasia - The Wicked Symphony,,318.0,146.0,74.0,25.0,,33.0,,27.0,,72.0,86.0,80.0
David Bowie - A Reality Tour,,,,,,,,155.0,71.0,2.0,274.0,12.0,
Gamma Ray - The Best (Of),,,,,,,27.0,4.0,145.0,302.0,437.0,163.0,278.0
Kaiser Chiefs - Employment,,,,,,296.0,26.0,18.0,7.0,,,19.0,
Pain of Salvation - Remedy Lane Re:lived,,,,,,,,,,,,448.0,495.0
Pain of Salvation - The Perfect Element (Part I),,100.0,468.0,56.0,30.0,,,,16.0,,,299.0,12.0
Pink Floyd - Dark Side of the Moon,65.0,18.0,61.0,57.0,147.0,9.0,,,505.0,,,,
Porcupine Tree - Lightbulb Sun,,,,,10.0,10.0,190.0,,144.0,112.0,161.0,20.0,30.0
Rage - From the Cradle to the Stage: 20th Anniversary,,,,,16.0,11.0,,,39.0,377.0,52.0,,


## Top 5 albuns for each year:


In [9]:
top_albuns = lastfm_df.groupby(['year','album_name','artist_name']).count().reset_index()
top_albuns = top_albuns[['year','album_name','artist_name','date']]
top_albuns.columns = ['year', 'album', 'artist', 'frequency']
top_albuns = top_albuns.sort_values(by=['year','frequency'], ascending=False)

df_list = []
for year in range(2009,2021):
    df = top_albuns[top_albuns.year == year].head(5)
    df_list.append(df)

top_5_albuns_per_year = pandas.concat(df_list).reset_index()
top_5_albuns_per_year.head(70)

Unnamed: 0,index,year,album,artist,frequency
0,415,2009,Signals,Rush,498
1,178,2009,Final Fantasy - The Black Mages,Nobuo Uematsu,300
2,171,2009,Falling Into Infinity,Dream Theater,282
3,14,2009,"20,000 Watt R.S.L.: Greatest Hits",Midnight Oil,267
4,270,2009,Live (CD 02),Blind Guardian,261
5,1317,2010,The Wicked Symphony,Avantasia,318
6,1126,2010,Radio Waves Goodbye,John Macaluso & Union Radio,309
7,651,2010,Angel of Babylon,Avantasia,288
8,1298,2010,The Second Death Of Pain Of Salvation,Pain of Salvation,245
9,1361,2010,V,Symphony X,240
