Welcome to Last.fm Data Visualiser!  
  
Before seeing your scrobbles come to life, go ahead and backup your scrobbles
with **LastFM_Backup.py** first. That downloads all the scrobbles from your Last.fm account into a .csv file.
There are a lot of tools to backup scrobbles into .csv, but I made my own because getting information
on the tags in a scrobble is pretty important to me. This is also the reason why the backup takes a lot
longer than other tools. But trust me, it's worthwhile! 
  
1. Go to https://www.last.fm/api/account/create to create an API account. Call the application whatever you want, leave the other boxes blank and click submit. And then, **copy the API key**.
2. Open **LastFM_Backup.py** with Notepad. Search for `YOUR_API_KEY` and replace it with the API KEY you obtained from step one.
3. Open command prompt in this directory and type in the command `pip install -r Requirements.txt`
4. To start the backup, open command prompt and type the command `python LastFM_Backup.py`.  
5. Input your last.fm username and number of pages to fetch per cycle (I suggest 1-4.This is to minimise the number of calls each cycle to prevent crashing.)
6. Be patient and wait for the backup to finish.    
If the program crashes, just restart the LastFM_Backup.py and it will finish off the backup.  

Once backup is done,
1. Come back to here and enter your Last.fm username in the cell below.
2. Go to https://plot.ly and sign up for an account. This is for plotting the interactive graph in this notebook.
3. Go to your plotly **account settings &rarr; API KEYS** and click **Regererate Key** to get an API KEY.
4. Enter the ploty information in the cell below. 
5. (optional but **highly recommended**) To view an awesome dashboard of this notebook, go to http://jupyter-dashboards-layout.readthedocs.io/en/latest/getting-started.html and follow the installation and enabling instructions.
6. Run the code and change the view to dashboard preview.

The rest is pretty self explanatory. Have fun!

In [1]:
username = "YOUR LAST.FM USERNAME"
plotly_username = 'PLOTLY USERNAME'
plotly_api_key = 'PLOTLY API KEY'

In [2]:
# Import the pandas library.
import pandas as pd
from scipy.stats import mode
import datetime as dt
from datetime import datetime
from datetime import timedelta
import time
from datetime import date
from collections import Counter
from collections import OrderedDict
import warnings
warnings.filterwarnings(action='ignore')
#plotting
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
#plot
import plotly
plotly.tools.set_credentials_file(username=plotly_username, 
                                  api_key=plotly_api_key)
import plotly.plotly as py
import plotly.graph_objs as go
#display texts
from IPython.display import display, Markdown, Latex
#widgets
import ipywidgets as widgets

Populating the interactive namespace from numpy and matplotlib


In [3]:
color1 = "rgb(0,51,153)"
color2 = "rgb(102,204,255)"
transparent = "rgba(0,0,0,0)"

In [4]:
#read data from csv
scrobbles_df = pd.read_csv(username+".csv", header=None, dtype=str)
scrobbles_df.columns = ["week", "time", "artist", "artist_mbid", "track", 
                     "track_mbid", "album", "album_mbid", "tags", 
                     "album art", "timestamp"]
#make sure the scrobbles are in the right order
scrobbles_df = scrobbles_df.sort_values(by=['timestamp'],ascending=False) 

In [5]:
print(scrobbles_df.head(2))

                       week                time              artist  \
0  2018-03-30 to 2018-04-05  31 Mar 2018, 13:51           Sam Feldt   
1  2018-03-30 to 2018-04-05  31 Mar 2018, 13:47  Axwell /\ Ingrosso   

  artist_mbid                   track track_mbid                    album  \
0         NaN  Runaways (feat. Teemu)        NaN   Runaways (feat. Teemu)   
1         NaN      More Than You Know        NaN  More Than You Know - EP   

  album_mbid                           tags album art   timestamp  
0        NaN                            NaN       NaN  1522504273  
1        NaN  trap, TrapHouse, electronic,        NaN  1522504070  


In [6]:
#get logo
url = "https://www.last.fm/user/{}/listening-report/week".format(username)
logo = "![logo](http://www.grindmodemusic.com/images/709px-lastfm_logosvg-u1359-r_2x.png?crc=211483745)"
display(Markdown("[{}]({})".format(logo,url)))

[![logo](http://www.grindmodemusic.com/images/709px-lastfm_logosvg-u1359-r_2x.png?crc=211483745)](https://www.last.fm/user/ongmk/listening-report/week)

In [7]:
#widgets
end_date = widgets.DatePicker(value=datetime.date.today())
period = widgets.BoundedIntText(value=7,min=1,
            max=scrobbles_df["timestamp"].count(),step=1,
            description='Period',disabled=False)
ui = widgets.HBox([end_date,period])
def f(a,b):
    global end_date, length
    end_date = a
    length = b

out = widgets.interactive_output(f, {'a': end_date, 'b': period})

display(ui, out)

A Jupyter Widget

A Jupyter Widget

# Last.fm summary

In [21]:
#period is the number of days to visualise, default=7
#previous period is the last period with the same length of time
def get_week_strings(input_date=datetime.date.today(), period = 7):
    time_difference = datetime.datetime.now()-datetime.datetime.utcnow()
    end_date_at_00 = (dt.datetime.combine(input_date, 
                  dt.datetime.min.time())+timedelta(days=1))
    end_timestamp = time.mktime(end_date_at_00.timetuple())
    start_datetime = end_date_at_00 - timedelta(days=period)
    start_timestamp = time.mktime(start_datetime.timetuple())
    _end_datetime = start_datetime
    _end_timestamp = time.mktime(_end_datetime.timetuple())
    _start_datetime = _end_datetime - timedelta(days=period)
    _start_timestamp = time.mktime(_start_datetime.timetuple())
    start_datetime = datetime.datetime.fromtimestamp(
        int(start_timestamp)).strftime('%Y-%m-%d')
    display(Markdown('from **{}** to **{}**'.format(start_datetime,
                (end_date_at_00-timedelta(days=1)).strftime('%Y-%m-%d'))))
    return [start_timestamp,end_timestamp], [_start_timestamp,_end_timestamp]
# now = datetime.date(2018,4, 1)
period,previous_period = get_week_strings(end_date,length)

from **2018-03-25** to **2018-03-31**

In [9]:
#filter dataframe to last week
temp_df = scrobbles_df.loc[(period[0] <= scrobbles_df["timestamp"].astype(int))]
week_df = temp_df.loc[(temp_df["timestamp"].astype(int) < period[1])]
temp_df = scrobbles_df.loc[(previous_period[0] <= scrobbles_df["timestamp"].astype(int))]
week_before_df = temp_df.loc[(temp_df["timestamp"].astype(int) < previous_period[1])]

In [10]:
#get data per day last week and last last week
def get_scrobble_per_day(week_df):
    day_dict = {}
    days = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]
    hour_dict = {}
    for day in days:
        day_dict[day] = 0
    for index, row in week_df.iterrows():
        datetime_object = dt.datetime.strptime(row["time"], '%d %b %Y, %H:%M')
        day_int = datetime_object.weekday()
        day_str = days[day_int]
        day_dict[day_str] += 1
    keyorder = ["Fri","Sat","Sun","Mon","Tue","Wed","Thu"]
    day_dict = OrderedDict(sorted(day_dict.items(), 
                                  key=lambda i:keyorder.index(i[0])))
    return day_dict

## Scrobbles each day

In [11]:
def plot_weekday_chart(week_df,week_before_df):
    week_before_dict = get_scrobble_per_day(week_before_df)
    week_dict = get_scrobble_per_day(week_df)
    days = ["Fri","Sat","Sun","Mon","Tue","Wed","Thu"]
    trace2 = go.Bar(x=days, y=list(week_before_dict.values()), name='Previous period',
                    marker=dict(color=color2))
    trace1 = go.Bar(x=days, y=list(week_dict.values()),
                    name='This period',marker=dict(color=color1))
    data = [trace2,trace1]
    layout = go.Layout(barmode='group',width=800,
                       height=300, margin=go.Margin(l=0, r=0, b=50,
                                                    t=0, pad=4))
    fig = go.Figure(data=data, layout=layout)
    return py.iplot(fig, filename='WeekdayChart')
plot_weekday_chart(week_df,week_before_df)

## Tops:

In [12]:
def get_top(week_df):
    for item in ["artist","album","track"]:
        item_mode = mode(week_df[item])[0][0]
        count = mode(week_df[item])[1][0]
        string = "Top "+item+": **"+unicode(item_mode,"utf-8")+"** ({})".format(count)
        display(Markdown(string))
get_top(week_df)

Top artist: **Jon Bellion** (98)

Top album: **The Human Condition** (54)

Top track: **Maybe IDK** (8)

In [13]:
#listening time
def get_listening_time(week_df):
    total_time = 0
    track_list = []
    stop_timer = True
    start_timestamp = 0
    stop_count = 0
    first_index = week_df.index[0]
    for index, row in week_df.iterrows():
        timestamp = int(row["timestamp"])
        if stop_timer:
            start_timestamp = timestamp
        try:
            next_timestamp = int(week_df.iloc[index-first_index+1]["timestamp"])
        except:
            next_timestamp = 99999999999
        if timestamp - next_timestamp <=300:
            stop_timer = False
        else:
            total_time += start_timestamp - timestamp + 180  
            #180s is the average length of a song, this is added
            #to account for the discontinued time data
            stop_timer = True
    hour = total_time/3600
    day = int(hour/24)
    hour = hour-24*day
    display(Markdown("### You had"))
    total_scrobbles = week_df['timestamp'].count()
    display(Markdown("# *_{}_* scrobbles".format(str(total_scrobbles))))
    display(Markdown("### Or "))
    if hour == 0:
        display(Markdown("# **{}** hours".format(day,hour)))
    else:
        display(Markdown("# **{}** day, **{}** hours".format(day,hour)))
    display(Markdown("### of awesomeness!!"))
get_listening_time(week_df)

### You had

# *_1122_* scrobbles

### Or 

# **2** day, **17** hours

### of awesomeness!!

## Listening clock

In [14]:
def get_hour_dict(week_df):
    hours = range(0, 24)
    hour_dict = {}
    for hour in hours:
        hour_dict[hour] = 0
    for index, row in week_df.iterrows():
        datetime_object = dt.datetime.strptime(row["time"], '%d %b %Y, %H:%M')
        hour = datetime_object.hour
        hour_dict[hour] += 1
    return hour_dict

In [15]:
def plot_listening_clock(week_df):
    hour_dict = get_hour_dict(week_df)
    theta = []
    counts = []
    data = []
    hours_strings = []
    for hour, count in hour_dict.items():
        theta.append(hour*360/24)
        counts.append(count)
        hours_strings.append(hour)
    i = 0
    max_count = max(counts)
    for item in theta:
        angle = [0,item, item+ +180/12,0]
        r = [0, counts[i],counts[i],0]
        background = go.Scatterpolar(r = [0,max_count,max_count], theta = angle,
            mode = 'lines', fill = 'toself', fillcolor = transparent, 
                   line =  dict(color = transparent),
                   name = str(counts[i])+" scrobbles")
        data.append(background)
        temp_data = go.Scatterpolar(r = r, theta = angle,
            mode = 'lines', fill = 'toself', fillcolor = color1,
            line =  dict(color = "white"),name = str(counts[i])+" scrobbles")
        data.append(temp_data)
        i+=1
    layout = go.Layout(showlegend = False,  
          polar = dict(radialaxis = dict(tickfont = dict(size = 8),
                  showgrid=False,visible=False), 
                  angularaxis = dict(tickfont = dict(size = 14),
                  tickmode="array",tickvals = theta,ticktext=hours_strings,
                  rotation = 90, direction = "clockwise",showline=False)),
                  width=400, height=300, 
                  margin=go.Margin(l=25, r=100, b=25, t=25, pad=4))
    fig = go.Figure(data=data, layout=layout)
    return py.iplot(fig, filename = 'Listening clock')
plot_listening_clock(week_df)

## Discoveries

In [16]:
def get_new_items(week_df,overall_df):
    items = ["artist", "track","album"]
    for item in items:
        tot_items = len(week_df[item].unique())
        first_scrobble_ts = week_df['timestamp'].iloc[-1]
        old_df = overall_df.loc[(scrobbles_df["timestamp"] < first_scrobble_ts)]
        new_items = []
        new_items_dict = {}
        for index, row in week_df.iterrows():
            item_on_row = row[item]
            if item_on_row == "" or item_on_row =="nan":
                continue
            elif any(old_df[item] == item_on_row):
                continue
            else:
                try:
                    if unicode(item_on_row,"utf-8") not in new_items:
                        item_on_row = unicode(item_on_row,"utf-8")
                        new_items.append(item_on_row)
                        new_items_dict[item_on_row] = 1
                    else:
                        new_items_dict[item_on_row] +=1
                except:
                    1
        top_key = max(new_items_dict, key=lambda k: new_items_dict[k])
        top_count = new_items_dict[top_key]
        num_new_items = len(new_items)
        percentage = num_new_items*100/tot_items
        display(Markdown("**{}** new {}s were discovered. ({}%)".format(
            num_new_items,item,percentage)))
        display(Markdown("**"+top_key+"** was your top {} discovery. ({})".format(
            item,top_count)))
get_new_items(week_df,scrobbles_df)

**81** new artists were discovered. (33%)

**Blackbear** was your top artist discovery. (20)

**457** new tracks were discovered. (61%)

**Maybe IDK** was your top track discovery. (8)

**239** new albums were discovered. (51%)

**The Definition** was your top album discovery. (17)

# Overall summary

## Top artists chart

## All time top artists are:

In [17]:
top_artists = scrobbles_df.groupby(['artist'])['artist'].agg(
    {"count": len}).sort_values(
    "count", ascending=False).head(10).reset_index()
for index,artist in top_artists.iloc[:5].iterrows():
    display(Markdown("**"+artist['artist']+"**"))

**Imagine Dragons**

**OneRepublic**

**MisterWives**

**Halsey**

**Coldplay**

In [18]:
data1 = []
for index,artist in top_artists.iterrows():
    artist_df = scrobbles_df.loc[artist['artist'] == scrobbles_df["artist"]]
    artist_df = artist_df.sort_values(by=['timestamp'],ascending=True)
    times = []
    counts = []
    count = 0
    for index,row in artist_df.iterrows():
        timestamp_int = int(row["timestamp"])
        times.append(datetime.datetime.fromtimestamp(timestamp_int))
        count+=1
        counts.append(count)
    trace = go.Scatter(x = times, y = counts,name=artist['artist'],
                       hoverinfo="y")
    data1.append(trace)
layout = go.Layout(width=800, height=400, 
                   margin=go.Margin(l=0, r=100, b=25, t=25, pad=4))
fig = go.Figure(data=data1, layout=layout)
py.iplot(fig, filename='Top Artists Graph')

## All time top tags are:

In [19]:
tag_list = []
for index, row in scrobbles_df.iterrows():
    tags_string = row["tags"]
    try:
        tags = [tag.strip() for tag in tags_string.split(',')]
        tags = [x for x in tags if x != ""]
        for tag in tags:
            tag_list.append(tag)
    except Exception,e:
        1
top_tags = Counter(tag_list).most_common()[:10]
top_five = top_tags[:5]
for tag in top_five:
    display(Markdown("**"+tag[0]+"**"))

**pop**

**indie**

**alternative**

**rock**

**electronic**

## Top tags chart

In [20]:
counts_dict = {}
count_dict = {}
for tag in top_tags:
    counts_dict[tag[0]] = []
    count_dict[tag[0]] = 0
times = []

reverse_df = scrobbles_df.sort_values(by=['timestamp'],ascending=True)
for index, row in reverse_df.iterrows():
    tags_string = row["tags"]
    try:
        timestamp_int = int(row["timestamp"])
        times.append(datetime.datetime.fromtimestamp(timestamp_int))
        tags = [s.strip() for s in tags_string.split(',')]
        tags = [x for x in tags if x != ""]
        for item in tags:
            for tag in top_tags:
                if tag[0] ==item:
                    count = count_dict[tag[0]] +1
                else:
                    count = count_dict[tag[0]]
                count_dict[tag[0]] = count
                counts_dict[tag[0]].append(count)
    except:
        1
data2 = []
for tag in top_tags:
    counts = counts_dict[tag[0]]
    trace = go.Scatter(x = times, y = counts,name=tag[0],
                       hoverinfo="y")
    data2.append(trace)
layout = go.Layout(width=800, height=400, 
                   margin=go.Margin(l=0, r=100, b=25, t=25, pad=4))
fig = go.Figure(data=data2, layout=layout)
py.iplot(fig, filename='Top Tags Graph')

The draw time for this plot will be slow for all clients.
