Welcome to Last.fm Data Visualiser!  
  
Before seeing your scrobbles come to life, go ahead and backup your scrobbles
with **LastFM_Backup.py** first. That downloads all the scrobbles from your Last.fm account into a .csv file.
There are a lot of tools to backup scrobbles into .csv, but I made my own because getting information
on the tags in a scrobble is pretty important to me. This is also the reason why the backup takes a lot
longer than other tools. But trust me, it's worthwhile! 
  
1. Go to https://www.last.fm/api/account/create to create an API account. Call the application whatever you want, leave the other boxes blank and click submit. And then, **copy the API key**.
2. Open **LastFM_Backup.py** with Notepad. Search for `YOUR_API_KEY` and replace it with the API KEY you obtained from step one.
3. Open command prompt in this directory and type in the command `pip install -r Requirements.txt`
4. To start the backup, open command prompt and type the command `python LastFM_Backup.py`.  
5. Input your last.fm username and number of pages to fetch per cycle (I suggest 1-4.This is to minimise the number of calls each cycle to prevent crashing.)
6. Be patient and wait for the backup to finish.    
If the program crashes, just restart the LastFM_Backup.py and it will finish off the backup.  
  
### Make sure you are not scrobbling during the backup, or it will mess up your backup!!!

Once backup is done,
1. Come back to here and enter your Last.fm details in the cell below.
2. Go to https://plot.ly and sign up for an account. This is for plotting the interactive graph in this notebook.
3. Go to your plotly **account settings &rarr; API KEYS** and click **Regererate Key** to get an API KEY.
4. Enter the ploty information in the cell below. 
5. (optional but **highly recommended**) To view an awesome dashboard of this notebook, go to http://jupyter-dashboards-layout.readthedocs.io/en/latest/getting-started.html and follow the installation and enabling instructions.
6. Run the code and change the view to dashboard preview.
![image](image/dashboard.png)

The rest is pretty self explanatory. Have fun!

In [96]:
username = "lastfm_username"
last_api_key = "lastfm_api_key"
plotly_username = 'plotly_username'
plotly_api_key = 'plotly_api_key'

In [2]:
# Import the pandas library.
import pandas as pd
from scipy.stats import mode
import datetime as dt
from datetime import datetime
from datetime import timedelta
import time
from datetime import date
from collections import Counter
from collections import OrderedDict
import warnings
warnings.filterwarnings(action='ignore')
#plotting
import numpy as np
import matplotlib.pyplot as plt
%pylab inline
#plot
import plotly
plotly.tools.set_credentials_file(username=plotly_username, 
                                  api_key=plotly_api_key)
import plotly.plotly as py
import plotly.graph_objs as go
#display texts
from IPython.display import display, Markdown, Latex
from IPython.display import Javascript
#widgets
import ipywidgets as widgets
from ipywidgets import  Layout
#get images
import urllib
import xmltodict

Populating the interactive namespace from numpy and matplotlib


In [3]:
#get logo
url = "https://www.last.fm/user/{}/listening-report/week".format(username)
logo = "![logo](./image/last.png)"
display(Markdown("[{}]({} 'Last.fm weekly report')".format(logo,url)))

[![logo](./image/last.png)](https://www.last.fm/user/ongmk/listening-report/week 'Last.fm weekly report')

In [4]:
#colours
color1 = "rgb(0,51,153)"
color2 = "rgb(102,204,255)"
transparent = "rgba(0,0,0,0)"

In [5]:
#read data from csv
scrobbles_df = pd.read_csv(username+".csv", header=None, dtype=str)
scrobbles_df.columns = ["week", "time", "artist", "artist_mbid", "track", 
                     "track_mbid", "album", "album_mbid", "tags", 
                     "album art", "timestamp"]
#make sure the scrobbles are in the right order
scrobbles_df = scrobbles_df.sort_values(by=['timestamp'],ascending=False) 

In [6]:
#widgets
last_week = datetime.date.today() - timedelta(days=7)
end_date = widgets.DatePicker(description="End date", value=datetime.date.today(),
           layout=Layout(width='90%', height='30px'))
start_date = widgets.DatePicker(description="Start date", value=last_week,
           layout=end_date.layout)
def run_all(ev):
    display(Javascript('IPython.notebook.execute_cell_range(IPython.notebook.get_selected_index()+1, IPython.notebook.ncells())'))
button = widgets.Button(description="Enter",layout = end_date.layout, 
                        disabled = False)
button.on_click(run_all)
ui =widgets.HBox([widgets.VBox([start_date]), widgets.VBox([end_date,button])])
def f(a,b):
    global end_date, length
    start_date = a
    end_date = b
    del_days = end_date - start_date
    if del_days.days <0:
        button.disabled = True
    else:
        button.disabled = False
    length = del_days.days+1
    
out = widgets.interactive_output(f, {'a': start_date, 'b': end_date})

display(ui, out)

A Jupyter Widget

A Jupyter Widget

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

# Last.fm summary

In [66]:
#period is the number of days to visualise, default=7
#previous period is the last period with the same length of time
def get_week_strings(input_date=datetime.date.today(), period = 7):
    time_difference = datetime.datetime.now()-datetime.datetime.utcnow()
    end_date_at_00 = (dt.datetime.combine(input_date, 
                  dt.datetime.min.time())+timedelta(days=1))
    end_timestamp = time.mktime(end_date_at_00.timetuple())
    start_datetime = end_date_at_00 - timedelta(days=period)
    start_timestamp = time.mktime(start_datetime.timetuple())
    _end_datetime = start_datetime
    _end_timestamp = time.mktime(_end_datetime.timetuple())
    _start_datetime = _end_datetime - timedelta(days=period)
    _start_timestamp = time.mktime(_start_datetime.timetuple())
    start_datetime = datetime.datetime.fromtimestamp(
        int(start_timestamp)).strftime('%Y-%m-%d')
    print 'from {} to {}'.format(start_datetime,
                (end_date_at_00-timedelta(days=1)).strftime('%Y-%m-%d'))
    return [start_timestamp,end_timestamp], [_start_timestamp,_end_timestamp]
# now = datetime.date(2018,4, 1)
period,previous_period = get_week_strings(end_date,length)
print period, previous_period

from 2018-01-05 to 2018-01-07
[1515110400.0, 1515369600.0] [1514851200.0, 1515110400.0]


In [67]:
#filter dataframe to last week
temp_df = scrobbles_df.loc[(period[0] <= scrobbles_df["timestamp"].astype(int))]
week_df = temp_df.loc[(temp_df["timestamp"].astype(int) < period[1])]
temp_df = scrobbles_df.loc[(previous_period[0] <= scrobbles_df["timestamp"].astype(int))]
week_before_df = temp_df.loc[(temp_df["timestamp"].astype(int) < previous_period[1])]

first_timestamp = scrobbles_df["timestamp"].min()
last_timestamp = scrobbles_df["timestamp"].max()
start = datetime.datetime.fromtimestamp(int(first_timestamp)).strftime('%Y-%m-%d')
end = datetime.datetime.fromtimestamp(int(last_timestamp)).strftime('%Y-%m-%d')
if week_df["timestamp"].count() == 0:
    display(Markdown("<span style='color:red;font-size:2em;'>Date out of range. Enter dates between {} and {}.</span>".format(start,end)))

In [68]:
#get data per day last week and last last week
def get_scrobble_per_day(week_df):
    day_dict = {}
    days = ["Mon","Tue","Wed","Thu","Fri","Sat","Sun"]
    hour_dict = {}
    for day in days:
        day_dict[day] = 0
    for index, row in week_df.iterrows():
        datetime_object = datetime.datetime.fromtimestamp(int(row["timestamp"]))
        day_int = datetime_object.weekday()
        day_str = days[day_int]
        day_dict[day_str] += 1
    keyorder = ["Fri","Sat","Sun","Mon","Tue","Wed","Thu"]
    day_dict = OrderedDict(sorted(day_dict.items(), 
                                  key=lambda i:keyorder.index(i[0])))
    return day_dict

## Scrobbles each day

In [69]:
#
def plot_weekday_chart(week_df,week_before_df):
    week_before_dict = get_scrobble_per_day(week_before_df)
    week_dict = get_scrobble_per_day(week_df)
    days = ["Fri","Sat","Sun","Mon","Tue","Wed","Thu"]
    trace2 = go.Bar(x=days, y=list(week_before_dict.values()), name='Previous period',
                    marker=dict(color=color2))
    trace1 = go.Bar(x=days, y=list(week_dict.values()),
                    name='This period',marker=dict(color=color1))
    data = [trace2,trace1]
    layout = go.Layout(barmode='group',width=800,
                       height=300, margin=go.Margin(l=100, r=0, b=50,
                                                    t=0, pad=4),
                      yaxis=dict(title='Scrobbles',showticklabels=True))
    fig = go.Figure(data=data, layout=layout)
    return py.iplot(fig, filename='WeekdayChart')
plot_weekday_chart(week_df,week_before_df)

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~askdfiuuhiuvbidvbksdhbfv/0 or inside your plot.ly account where it is named 'WeekdayChart'


## Tops:

In [70]:
#listening time
def get_listening_time(week_df):
    total_time = 0
    track_list = []
    stop_timer = True
    start_timestamp = 0
    stop_count = 0
    first_index = week_df.index[0]
    for index, row in week_df.iterrows():
        timestamp = int(row["timestamp"])
        if stop_timer:
            start_timestamp = timestamp
        try:
            next_timestamp = int(week_df.iloc[index-first_index+1]["timestamp"])
        except:
            next_timestamp = 99999999999
        if timestamp - next_timestamp <=300:
            stop_timer = False
        else:
            total_time += start_timestamp - timestamp + 180  
            #180s is the average length of a song, this is added
            #to account for the discontinued time data
            stop_timer = True
    hour = total_time/3600
    day = int(hour/24)
    hour = hour-24*day
    display(Markdown("### You had"))
    total_scrobbles = week_df['timestamp'].count()
    display(Markdown("# *_{}_* scrobbles".format(str(total_scrobbles))))
    display(Markdown("### Or "))
    if hour == 0:
        display(Markdown("# **{}** hours".format(day,hour)))
    else:
        display(Markdown("# **{}** day, **{}** hours".format(day,hour)))
    display(Markdown("### of awesomeness!!"))
get_listening_time(week_df)

### You had

# *_542_* scrobbles

### Or 

# **1** day, **7** hours

### of awesomeness!!

## Listening clock

In [71]:
#
def get_hour_dict(week_df):
    hours = range(0, 24)
    hour_dict = {}
    for hour in hours:
        hour_dict[hour] = 0
    for index, row in week_df.iterrows():
        datetime_object = dt.datetime.strptime(row["time"], '%d %b %Y, %H:%M')
        hour = datetime_object.hour
        hour_dict[hour] += 1
    return hour_dict

In [72]:
#
def plot_listening_clock(week_df):
    hour_dict = get_hour_dict(week_df)
    theta = []
    counts = []
    data = []
    hours_strings = []
    for hour, count in hour_dict.items():
        theta.append(hour*360/24)
        counts.append(count)
        hours_strings.append(hour)
    i = 0
    max_count = max(counts)
    for item in theta:
        angle = [0,item, item+ +180/12,0]
        r = [0, counts[i],counts[i],0]
        background = go.Scatterpolar(r = [0,max_count,max_count], theta = angle,
            mode = 'lines', fill = 'toself', fillcolor = transparent, 
                   line =  dict(color = transparent),
                   name = str(counts[i])+" scrobbles")
        data.append(background)
        temp_data = go.Scatterpolar(r = r, theta = angle,
            mode = 'lines', fill = 'toself', fillcolor = color1,
            line =  dict(color = "white"),name = str(counts[i])+" scrobbles")
        data.append(temp_data)
        i+=1
    layout = go.Layout(showlegend = False,  
          polar = dict(radialaxis = dict(tickfont = dict(size = 8),
                  showgrid=False,visible=False), 
                  angularaxis = dict(tickfont = dict(size = 14),
                  tickmode="array",tickvals = theta,ticktext=hours_strings,
                  rotation = 90, direction = "clockwise",showline=False)),
                  width=400, height=300, 
                  margin=go.Margin(l=25, r=100, b=25, t=25, pad=4))
    fig = go.Figure(data=data, layout=layout)
    return py.iplot(fig, filename = 'Listening clock')
plot_listening_clock(week_df)

## Discoveries

In [73]:
def get_artist_image(artist_name):
    url_str = ("http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist="+
               artist_name+"&api_key="+last_api_key)
    try:
        page = urllib.urlopen(url_str)
        parsed_page = xmltodict.parse(page)
        images = parsed_page['lfm']['artist']['image']
        for size in ["mega","extralarge","large"]:
            for image in images:
                if image["@size"]==size:
                    image_url = image["#text"]
                    return image_url
        return 0
    except:
        return 0

In [74]:
def get_track_album_art(artist_name, track_name):
    url_str = ("http://ws.audioscrobbler.com/2.0/?method=track.getInfo&artist="+
                    artist_name+"&api_key="+last_api_key+"&track="+track_name)
    try:
        page = urllib.urlopen(url_str)
        parsed_page = xmltodict.parse(page)
        images = parsed_page['lfm']['track']['album']['image']
        for size in ["extralarge","large","medium"]:
            for image in images:
                if image["@size"]==size:
                    image_url = image["#text"]
                    return image_url
        return 0
    except:
        return 0

In [75]:
def get_album_art(artist_name, album_name):
    url_str = ("http://ws.audioscrobbler.com/2.0/?method=album.getInfo&artist="+
                    artist_name+"&api_key="+last_api_key+"&album="+album_name)
    try:
        page = urllib.urlopen(url_str)
        parsed_page = xmltodict.parse(page)
        images = parsed_page['lfm']['album']['image']
        for size in ["mega","extralarge","large"]:
            for image in images:
                if image["@size"]=="mega":
                    image_url = image["#text"]
                    return image_url
        return 0
    except Exception,e:
        return 0

In [76]:
def discovery_formatter(image_url,str1,str2=""):
    display(Markdown("""<div>
                        <p style="float: left;">
                            <img src={} height="70px" width="70px" style="margin: 0px 20px 0px 0px"></p>
                        <p><span>{}</span></p>
                        <p><span>{}</span></p>
                    </div>""".format(image_url,str1,str2)))

In [77]:
#
def get_new_items(week_df,overall_df):
    items = ["artist", "track","album"]
    top_dict = {}
    for item in items:
        tot_items = len(week_df[item].unique())
        first_scrobble_ts = week_df['timestamp'].iloc[-1]
        old_df = overall_df.loc[(scrobbles_df["timestamp"] < first_scrobble_ts)]
        new_items = []
        new_items_dict = {}
        for index, row in week_df.iterrows():
            item_on_row = row[item]
            if item_on_row == "" or item_on_row =="nan":
                continue
            elif any(old_df[item] == item_on_row):
                continue
            else:
                try:
                    if unicode(item_on_row,"utf-8") not in new_items:
                        item_on_row = unicode(item_on_row,"utf-8")
                        new_items.append(item_on_row)
                        new_items_dict[item_on_row] = 1
                    else:
                        new_items_dict[item_on_row] +=1
                except:
                    1
        top_key = max(new_items_dict, key=lambda k: new_items_dict[k])
        top_count = new_items_dict[top_key]
        top_dict[item] = top_key
        num_new_items = len(new_items)
        percentage = num_new_items*100/tot_items
        str1 = "**{}** new {}s were discovered. ({}%)".format(num_new_items,item,percentage)
        str2 = "**"+top_key.encode('ascii', 'xmlcharrefreplace')+"** was your top {} discovery. ({})<br /><br />".format(
            item,top_count)
        if item == "artist":
            url = get_artist_image(top_key.encode('utf-8'))
        elif item == "track":
            temp_df = week_df[week_df["track"]==top_key]
            artist_name = temp_df.iloc[0]["artist"]
            url = get_track_album_art(artist_name,top_key.encode('utf-8'))
        elif item == "album":
            temp_df = week_df[week_df["album"]==top_key]
            artist_name = temp_df.iloc[0]["artist"]
            url = get_album_art(artist_name,top_key.encode('utf-8'))
        discovery_formatter(url,str1,str2)
    return top_dict
top_dict = get_new_items(week_df,scrobbles_df)

<div>
                        <p style="float: left;">
                            <img src=https://lastfm-img2.akamaized.net/i/u/300x300/047a20990f8ffd0442352e67c4e0bcdc.png height="70px" width="70px" style="margin: 0px 20px 0px 0px"></p>
                        <p><span>**247** new artists were discovered. (100%)</span></p>
                        <p><span>**The Beatles** was your top artist discovery. (22)<br /><br /></span></p>
                    </div>

<div>
                        <p style="float: left;">
                            <img src=https://lastfm-img2.akamaized.net/i/u/300x300/f0a44aac5ba02d6e7b71d860bec0a91d.png height="70px" width="70px" style="margin: 0px 20px 0px 0px"></p>
                        <p><span>**451** new tracks were discovered. (100%)</span></p>
                        <p><span>**All Falls Down** was your top track discovery. (5)<br /><br /></span></p>
                    </div>

<div>
                        <p style="float: left;">
                            <img src=0 height="70px" width="70px" style="margin: 0px 20px 0px 0px"></p>
                        <p><span>**369** new albums were discovered. (99%)</span></p>
                        <p><span>**Doo-Wops & Hooligans** was your top album discovery. (8)<br /><br /></span></p>
                    </div>

In [78]:
#
display(Markdown("## Learn more about: " + top_dict["artist"]))

## Learn more about: The Beatles

In [79]:
def learn_more_formatter(image_url,str1):
    display(Markdown("""<div>
                        <p style="float: left;">
                            <img src={} height="200" width="200px" style="margin: 0px 20px 20px 0px"></p>
                        <p><span>{}</span></p>
                    </div>""".format(image_url,str1)))

In [80]:
#
try:
    url_str = ("http://ws.audioscrobbler.com/2.0/?method=artist.getinfo&artist="+
                    top_dict["artist"].encode('ascii', 'xmlcharrefreplace')+"&api_key="+last_api_key)
    page = urllib.urlopen(url_str)
    parsed_page = xmltodict.parse(page)
    bio = parsed_page['lfm']['artist']['bio']
    summary = bio["summary"]
except:
    summary = "No summary found."
url = get_artist_image(top_dict["artist"])
learn_more_formatter(url, summary)

<div>
                        <p style="float: left;">
                            <img src=https://lastfm-img2.akamaized.net/i/u/300x300/047a20990f8ffd0442352e67c4e0bcdc.png height="200" width="200px" style="margin: 0px 20px 20px 0px"></p>
                        <p><span>The Beatles were an iconic rock group from Liverpool, England. They are frequently cited as the most commercially successful and critically acclaimed band in modern history, with innovative music, a cultural impact that helped define the 1960s and an enormous influence on music that is still felt today. Currently, The Beatles are one of the two musical acts to sell more than 1 billion records, with only Elvis Presley having been able to achieve the same feat. <a href="https://www.last.fm/music/The+Beatles">Read more on Last.fm</a></span></p>
                    </div>

## Top tracks:

In [81]:
#
top_tracks = week_df.groupby(['track'])['track'].agg(
    {"count": len}).sort_values(
    "count", ascending=False).head(20).reset_index()
for index,track in top_tracks.iloc[:5].iterrows():
    display(Markdown("**"+unicode(track['track'],"utf-8")+"** ({})".format(track['count'])))

**Apologize** (5)

**All Falls Down** (5)

**Your Song** (4)

**More Than You Know** (4)

**Only One - Radio Edit** (4)

In [82]:
#
for index,track in top_tracks.iterrows():
    track_name = track["track"]
    temp_df = week_df[week_df["track"]==track_name]
    artist_name = temp_df.iloc[0]["artist"]
    url = get_track_album_art(artist_name,track_name)
    if url==0:
        continue
    else:
        display(Markdown("![]("+url+")"+track_name))
        break

![](https://lastfm-img2.akamaized.net/i/u/300x300/3df8c827345740e9b9a0559cbf27da16.png)Apologize

## Mainstream meter

In [83]:
#
count = 0
i=0
for index,track in top_tracks.iterrows():
    try:
        track_name = track["track"]
        temp_df = week_df[week_df["track"]==track_name]
        artist_name = temp_df.iloc[0]["artist"]
        url_str = ("http://ws.audioscrobbler.com/2.0/?method=track.getinfo&artist="+
                    artist_name+"&api_key="+last_api_key+"&track="+track_name)
        page = urllib.urlopen(url_str)
        parsed_page = xmltodict.parse(page)
        listener = parsed_page['lfm']['track']['playcount']
        count += int(listener)
        if i == 10:
            break
        i+=1
    except:1
try:
    avg_count = count/i
except:
    print "Mainstream score calculation failed."
    avg_count =0
a = 0.000358
b = 5.3366
# a y = ax^b curve fit is used, where a and b are the constants
mainstream_score = (avg_count/a)**(1/b)

In [84]:
#
scale = ['What are you even listening to?', 'You have a special taste in music.',
          'Not so mainstream.', 'Mainstream-ish.', 'Moderately mainstream.',
          'Pure mainstream. Wow.']
c= ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 360, 101)]
trace = [go.Scatter(x=[0.6], y=[mainstream_score], hoverinfo='y',
                    showlegend=False,marker={'color': transparent})]
shapes = []
shapes.append({'type': 'rect','x0': 0.1, 'x1': 1.1,'y0': 0, 'y1': mainstream_score,
              'fillcolor': c[int(mainstream_score)]})
shapes.append({'type': 'rect','x0': 0.1, 'x1': 1.1,'y0': 0, 'y1': 100})
for i in range(1,5):
    line = {'type': "line",'x0': 0.15,'y0': 20*i,'x1': 1.1,'y1': 20*i,
           'opacity':0.2}
    shapes.append(line)
xaxes ={'range':[0, 4], 'showgrid': False, 'showline': False,
         'zeroline': False, 'showticklabels': False}
yaxes = {'range':[-5,105], 'showgrid': False,
         'showline': False, 'zeroline': False,
         'ticktext':scale, 'tickvals':[0., 20.,40.,60.,80.,100.]}
layout = {'shapes': shapes,'xaxis': xaxes,'yaxis': yaxes,
          'autosize': False,'width': 350, 'height': 400,
          "margin" : go.Margin(l=200, r=50, b=0, t=25, pad=4)}
fig = dict(data=trace, layout=layout)
py.iplot(fig, filename='Mainstream meter')

## Top artists:

In [85]:
#
top_artists = week_df.groupby(['artist'])['artist'].agg(
    {"count": len}).sort_values(
    "count", ascending=False).head(10).reset_index()
for index,artist in top_artists.iloc[:5].iterrows():
    display(Markdown("**"+unicode(artist['artist'],"utf-8")+"** ({})".format(artist["count"])))

**The Beatles** (22)

**Imagine Dragons** (19)

**Bruno Mars** (17)

**OneRepublic** (17)

**The Chainsmokers** (13)

In [86]:
#
for index,artist in top_artists.iterrows():
    artist_name = artist["artist"]
    url = get_artist_image(artist_name)
    if url==0:
        continue
    else:
        display(Markdown("![]("+url+")"+artist_name))
        break

![](https://lastfm-img2.akamaized.net/i/u/300x300/047a20990f8ffd0442352e67c4e0bcdc.png)The Beatles

## Top artists chart

In [94]:
#
counts_dict = {}
count_dict = {}
for index,artist in top_artists.iterrows():
    counts_dict[artist['artist']] = []
    count_dict[artist['artist']] = 0
times = []
reverse_df = week_df.sort_values(by=['timestamp'],ascending=True)
for index, row in reverse_df.iterrows():
    artist_name = row["artist"]
    timestamp_int = int(row["timestamp"])
    times.append(datetime.datetime.fromtimestamp(timestamp_int))
    for index, artist in top_artists.iterrows():
        if row["artist"] ==artist["artist"]:
            count = count_dict[artist['artist']] +1
        else:
            count = count_dict[artist['artist']]
        count_dict[artist['artist']] = count
        counts_dict[artist['artist']].append(count)
data = []
for index,artist in top_artists.iterrows():
    counts = counts_dict[artist['artist']]
    trace = go.Scatter(x = times, y = counts,name = artist['artist'],
                       hoverinfo="name+y")
    data.append(trace)
layout = go.Layout(width=800, height=400, 
                   margin=go.Margin(l=100, r=100, b=100, t=25, pad=4),
                   yaxis=dict(title='Scrobbles',showticklabels=True))
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='Top Artists Graph')

## Top tags:

In [88]:
#
def get_top_tags(input_df):
    tag_list = []
    for index, row in input_df.iterrows():
        tags_string = row["tags"]
        try:
            tags = [tag.strip() for tag in tags_string.split(',')]
            tags = [x for x in tags if x != ""]
            for tag in tags:
                tag_list.append(tag)
        except Exception,e:
            1
    top_tags = Counter(tag_list).most_common()[:10]
    return top_tags
top_tags = get_top_tags(week_df)
top_five = top_tags[:5]
for tag in top_five:
    display(Markdown("**"+tag[0]+"** ({})".format(tag[1])))

**pop** (187)

**electronic** (58)

**dance** (47)

**alternative** (46)

**indie** (46)

## Top tags chart

In [95]:
#
counts_dict = {}
count_dict = {}
for tag in top_tags:
    counts_dict[tag[0]] = []
    count_dict[tag[0]] = 0
times = []

reverse_df = week_df.sort_values(by=['timestamp'],ascending=True)
for index, row in reverse_df.iterrows():
    tags_string = row["tags"]
    try:
        timestamp_int = int(row["timestamp"])
        times.append(datetime.datetime.fromtimestamp(timestamp_int))
        tags = [s.strip() for s in tags_string.split(',')]
        tags = [x for x in tags if x != ""]
        for item in tags:
            for tag in top_tags:
                if tag[0] ==item:
                    count = count_dict[tag[0]] +1
                else:
                    count = count_dict[tag[0]]
                count_dict[tag[0]] = count
                counts_dict[tag[0]].append(count)
    except:
        1
data2 = []
for tag in top_tags:
    counts = counts_dict[tag[0]]
    trace = go.Scatter(x = times, y = counts,name=tag[0],
                       hoverinfo="name+y")
    data2.append(trace)
layout = go.Layout(width=800, height=400, 
                   margin=go.Margin(l=100, r=100, b=100, t=25, pad=4),
                   yaxis=dict(title='Scrobbles',showticklabels=True))
fig = go.Figure(data=data2, layout=layout)
py.iplot(fig, filename='Top Tags Graph')

## Top albums

In [90]:
#
top_albums = week_df.groupby(['album'])['album'].agg(
    {"count": len}).sort_values(
    "count", ascending=False).head(10).reset_index()
for index,album in top_albums.iloc[:5].iterrows():
    display(Markdown("**"+unicode(album['album'],"utf-8")+"** ({})".format(album["count"])))

**÷ (Deluxe)** (9)

**Doo-Wops & Hooligans** (8)

**AVĪCI (01)** (7)

**Smoke + Mirrors (Deluxe)** (5)

**Waking Up** (5)

In [91]:
def f(x,rank,d):
    n_max=len(rank)
    x_max=(2*n_max-1)*d
    if x<0:
        return None
    elif x_max<=x<=x_max+8:
        if rank[-1] == 0:
            return None
        else:
            return rank[-1]
    elif x>=x_max+8:
        return None
    else:
        n=int(x/(2*d))
        x1= x%(2*d)
        if x1<d:
            if rank[n]==0:
                return None
            else:
                return rank[n]
        else:
            L=rank[n+1]-rank[n]
            x0=n*2*d+1.5*d
            y0=(rank[n+1]+rank[n])*0.5
            if rank[n]==0:
                return None
            elif rank[n+1]==0:
                return None
            else:
                f=(2*((1+exp(-3*(x-x0)))**-1)-1)*0.5*L+y0
                return f
            
def plotter(title, rank):
    x = np.linspace(-1,80,200)
    y=[]
    i = 0
    dots = []
    for number in x:
        y.append(f(number,rank,4))
#         if y[i-1] == None and y[i] != None:
#             trace = go.Scatter(x = [number], y = [y[i]],marker={'size': 15},showlegend=False,hoverinfo="none")
#             dots.append(trace)
#         i+=1
    trace1 = [go.Scatter(x = x, y = y,hoverinfo="name",name=title, line = dict(width = 7,color = trace.line.color))]
    return dots+trace1

## Tags Subway Plot

In [92]:
#
end_ts = int(week_df["timestamp"].max())
start_ts = int(week_df["timestamp"].min())
interval = (end_ts-start_ts)*0.1
tag_dict = {}
all_ranked_tags = set([])
for i in range(10):
    lower = start_ts+i*interval
    upper = start_ts+(i+1)*interval
    interval_df = week_df.loc[(week_df["timestamp"].astype(int) <= upper)]
    top_tags = get_top_tags(interval_df)[:5]
    tags_list = []
    for item in top_tags:
        all_ranked_tags.add(item[0])
        tags_list.append(item[0])
    tag_dict[i] = tags_list
data = []
for tag in all_ranked_tags:
    tag_dict[tag] = []
    rank_list = []
    for i in range(10):
        try:rank_list.append((1+tag_dict[i].index(tag)))
        except:rank_list.append(0)
    traces = plotter(tag,rank_list)
    data+=traces

xticks = [datetime.datetime.fromtimestamp(start_ts+i*interval).strftime('%y-%m-%d') for i in range(11)]
xaxes ={'showgrid': True, 'showline': False,
         'zeroline': False, 'showticklabels': True,
         'ticktext':xticks, 'tickvals':[8*i for i in range(11)]}
yaxes = {'showgrid': False,'showline': False, 'zeroline': False,'showticklabels': False}
layout = {'xaxis': xaxes,'yaxis': yaxes, 'width':1000, "height":250, "margin":go.Margin(l=100, r=100, b=25, t=25, pad=4)}
fig = dict(data=data, layout=layout)
py.iplot(fig, filename='Tube map')