# NVD3 Visualizations

## Procedural vs Declarative Visualizations

**Procedural :**
- The actions which are to be taken are defined successively.
- Example : matplotlib.pyplot

**Declarative :**
- The models of the relationship are defined (declared) before the plotting starts.
- Example : D3.js

NVD3 is a Python wrapper for d3.js library which is used for declarative visualizations.

## Installing python-nvd3

In [1]:
!pip install python-nvd3



In [2]:
import nvd3
import pandas as pd
import operator
# Initializing for the notebook
nvd3.ipynb.initialize_javascript()

loaded nvd3 IPython extension
run nvd3.ipynb.initialize_javascript() to set up the notebook
help(nvd3.ipynb.initialize_javascript) for options


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

## Getting the Data

In [3]:
# import data
import pickle
data = pickle.load(open('data_list','rb'))

In [4]:
df = pd.DataFrame(data)
df = df.drop(["albumArtRef", "artistArtRef", "albumId", "artistId", "clientId", "nid", "storeId"], axis=1)

In [5]:
df["durationMillis"] = pd.to_numeric(df["durationMillis"])
df["estimatedSize"] = pd.to_numeric(df["estimatedSize"])

In [6]:
df.describe()

Unnamed: 0,discNumber,durationMillis,estimatedSize,playCount,trackNumber,year
count,736.0,736.0,736.0,732.0,736.0,735.0
mean,1.013587,253180.706522,10149370.0,24.52459,5.01087,2008.209524
std,0.115847,73870.938806,2954185.0,15.408197,4.822272,11.308875
min,1.0,30000.0,1202704.0,1.0,1.0,1902.0
25%,1.0,211000.0,8459782.0,13.0,1.0,2006.0
50%,1.0,238000.0,9532630.0,21.0,3.0,2012.0
75%,1.0,284250.0,11401430.0,32.0,7.0,2015.0
max,2.0,738000.0,29525710.0,92.0,49.0,2018.0


In [7]:
def get_artist_count(data):
    albumArtist = {}
    for each_song in data:
        if(albumArtist.get(each_song['albumArtist'])):
                albumArtist[each_song['albumArtist']]+=1
        else:
                albumArtist[each_song["albumArtist"]]=1
    
    album_artist_sorted = sorted(albumArtist.items(),key = operator.itemgetter(1),reverse=True)
    album_artist = [x[0] for x in album_artist_sorted]
    artist_count = [x[1] for x in album_artist_sorted]
    
    return album_artist,artist_count

In [42]:
def get_played_count(data):
    artist_count = {}
    for each_song in data:
        try:
            count = each_song['playCount']
        except:
            count = 0
        artist = each_song['albumArtist']
        
        if(artist_count.get(artist)):
            artist_count[artist]+=count
        else:
            artist_count[artist] = count
            
    album_artist_sorted = sorted(artist_count.items(),key = operator.itemgetter(1),reverse=True)
    album_artist = [x[0] for x in album_artist_sorted]
    artist_count = [x[1] for x in album_artist_sorted]
    
    return album_artist,artist_count

In [52]:
def top_ten_songs(data):
    song_count = {}
    
    for each_song in data:

        try:
            song_name = each_song['title']
        except:
            song_name = "NIL"
            
        try:
            song_count_curr = each_song['playCount']
        except:
            song_count_curr = 0
        
        if(song_count.get(song_name)):
            song_count[song_name]+=song_count_curr
        
        else:
            song_count[song_name]=song_count_curr
    
    song_count_sorted =  sorted(song_count.items(),key = operator.itemgetter(1),reverse=True)
    song = [x[0] for x in song_count_sorted]
    song_cnt = [x[1] for x in song_count_sorted]
    
    return song,song_cnt

In [64]:
def song_length(data):  
    duration_minute = []  
    for each_song in data:     
        duration_minute.append(int(each_song['durationMillis'])/(1000*60))     
    return duration_minute

## Bar Charts

In [65]:
def makebarchart(data,filename):
    xdata,ydata=data
    barchart = nvd3.discreteBarChart(name = 'discreteBarChart',height = 800, width = 1200)
    barchart.add_serie(y=ydata[:10],x=xdata[:10])
    barchart.buildhtml()
    chart_html = barchart.htmlcontent
    with open(filename +'.html','w') as f:
        f.write(chart_html)


In [66]:
makebarchart(get_artist_count(data),'albumartist')

In [67]:
makebarchart(get_played_count(data),'artistplayed')

In [68]:
makebarchart(top_ten_songs(data),'top10songs')


In [71]:
makebarchart((range(len(song_length(data))),song_length(data)),'songlength')