In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import spotipy
import pyen
import os
import math
import json
import urllib2
import collections
import utils
import kojak
import pickle
import pprint
import csv
import pandas as pd
import random
from pymongo import MongoClient
import string
import matplotlib.pyplot as plt
import plotly.plotly as py
from plotly.graph_objs import *
import plotly.tools as tls

home = os.environ['HOME']
keypath = home + '/projects/secure/echo_nest_api_key.password'
with open(keypath, 'r') as infile:
    echo_nest_api_key = infile.read()
    
sp = spotipy.Spotify()
en = pyen.Pyen(echo_nest_api_key)
mk = kojak.MusicGrabKojak(en_api_key_path=keypath)

client = MongoClient()
db = client.dsbc              
print db.collection_names()         
musicdb = client.dsbc.music

def view_albums(artistname):
    print """# %s Studio Albums""" % artistname
    artistid = mk.echonest_get_spotify_artist_id(artistname)
    albums = mk.spotify_get_artist_albums(artistid)
    for album in albums:
        theid = album['id']
        try:
            year = musicdb.find_one({'album_id': theid})['custom_album_year']
        except TypeError:
            continue
        name = album['name']
        print """{"idnum": "%s", "year": "%i", "name": "%s"},""" % (theid, year, name)

[u'music', u'system.indexes']


In [3]:
# Ploting functions (using plotly)
def grab_plot_data(field, bandobj, showscatter=True, doscatter=True):
    
    relevant_fields = ['custom_album_year', 'album_name', 'sp_preview_url', 'en_artist', 'en_title']
    relevant_fields.append(field)
      
    info = bandobj.df

    # Get summary statistics for year (album)
    
    means = []
    text = []
    describe = bandobj.df.groupby('custom_album_year').describe()
    years = sorted(set(info['custom_album_year'].values.tolist()))
    
    for year in years:
        # hacky way to skip over problem data point
        if field == 'album_popularity' and year == 2014 and bandobj.artistname == 'Pink Floyd':
            continue
        means.append(describe[field].ix[year].ix['mean'])
#         text.append(describe[])
        
    # Pull out individual track info
    yvals = info[field].values.tolist()
    xvals = info['custom_album_year'].values.tolist()
    xvals_scatter = xvals # Lazy coding here to turn off slight scatters in x coordinates
#     xvals_scatter = [int(value) + random.uniform(-0.12, 0.12) for value in xvals]


    # Create text data for hover
    urls = info['sp_preview_url'].values.tolist()
    tracknames = info['en_title'].values.tolist()
    albumnames = info['album_name'].values.tolist()
    text = ["%s<br>%s<br>%s" % (tracknames[n], albumnames[n], u) for n, u in enumerate(urls)]
        
    # Set up data for scatter plot and summary plot
    summary = Scatter(y=means, x=years, mode='line', name=bandobj.artistname)
    data = [summary]
    if doscatter: 
        if showscatter:
            scatter = Scatter(y=yvals, x=xvals_scatter, text=text, mode='markers', name=bandobj.artistname)
        else:
            scatter = Scatter(y=yvals, x=xvals_scatter, text=text, mode='markers', name=bandobj.artistname, visible=False)   
        data.append(scatter) 
    return data
  
def plot_data(field, bandobjs, bandlist='ALL', filename='Band Plot', **kwargs):   
    data = []
    for bandobj in bandobjs:
        if bandlist == 'ALL' or bandobj.artistname in bandlist:
            data += grab_plot_data(field, bandobj, **kwargs)
            print bandobj.artistname
             
    plot_url = py.plot(data, filename=filename)
   

In [3]:
# help(py.plot)

In [144]:
# # View albums by artist in database
# view_albums('Simple Minds')

### Set up TrackAnalysis objects for each band

In [3]:
# Make list of bands in studioalbums file
# this instance of TrackAnalysis exists solely to read in the albums file.
thewho = kojak.TrackAnalysis('The Who', musicdb)
bandlist = list(set([album['artist'] for album in thewho._read_album_file(thewho.album_file)]))
shortnames = [filter(str.isalnum, band).lower() for band in bandlist] #What to call instances of Track Analysis

# # Create and fill TrackAnalysis instance for each band in bandlist
bandobjs = []
# First, create TrackAnalysis instance for each band
for n, band in enumerate(bandlist):
    sn = shortnames[n]
    exec_string1 = sn + """ = kojak.TrackAnalysis('""" + band + """', musicdb)"""
    exec_string2 = "bandobjs.append(" + sn + ")" 
    exec exec_string1
    exec exec_string2
# Second, fill each TrackAnalysis instance for each band, and 
# group by album
for n, band in enumerate(bandobjs):
    band.create_dataframe()
    band.albumgroup = band.df.groupby('album_name')
    print "%i\t%s\t%s" %(n, band.artistname, shortnames[n])

0	Chicago	chicago
1	Van Halen	vanhalen
2	Cheap Trick	cheaptrick
3	Red Hot Chili Peppers	redhotchilipeppers
4	U2	u2
5	Genesis	genesis
6	Motorhead	motorhead
7	The Cure	thecure
8	Golden Earring	goldenearring
9	Aerosmith	aerosmith
10	Journey	journey
11	Kiss	kiss
12	Fleetwood Mac	fleetwoodmac
13	ZZ Top	zztop
14	Metallica	metallica
15	Allman Brothers	allmanbrothers
16	Rush	rush
17	Slayer	slayer
18	The Who	thewho
19	Tom Petty and the Heartbreakers	tompettyandtheheartbreakers
20	The Eagles	theeagles
21	Judas Priest	judaspriest
22	The Beach Boys	thebeachboys
23	Iron Maiden	ironmaiden
24	Jethro Tull	jethrotull
25	Simple Minds	simpleminds
26	The Rolling Stones	therollingstones
27	Queen	queen
28	Pink Floyd	pinkfloyd
29	Yes	yes


In [46]:
# sorted(bandlist)

### And Plot It

In [4]:
keepmybands = ['Cheap Trick', 'Genesis', 'Motorhead', 'The Cure', 'Golden Earring', 'Metallica',
                  'Slayer', 'Tom Petty and the Heartbreakers', 'Judas Priest', 'Jethro Tull', 'Simple Minds'
                  ]
bands60s = ['The Who', 'The Rolling Stones', 'Allman Brothers', 'Pink Floyd',
            'The Beach Boys', 'YES', 'Jethro Tull', 'Fleetwood Mac', 'Genesis']
# bands70s
bands80s = ['Slayer', 'U2', 'Iron Maiden', 'Red Hot Chili Peppers']
# keepmainbands = keepmybands += []
t = plot_data('album_popularity', bandobjs, bandlist=bands80s, doscatter=False, filename='80s Bands Popularity')

Red Hot Chili Peppers
U2
Slayer
Iron Maiden


In [12]:
tempo_bands = ['Genesis', 'Chicago']
t = plot_data('en_audio_summary_tempo', bandobjs, bandlist=tempo_bands, filename='Band Tempos 2')

Chicago
Genesis


In [41]:
print t

None


In [6]:
# thewho.df.head()
thewho.df['album_name'].value_counts()

My Generation (Deluxe Edition)       30
The Who Sell Out (Remastered)        23
Tommy (Remastered)                   23
Endless Wire                         21
A Quick One (Remastered)             20
Quadrophenia                         17
The Kids Are Alright (Remastered)    17
It's Hard                            16
Who's Next (Remastered)              16
Face Dances (Remastered)             14
Who Are You                          14
The Who By Numbers                   13
dtype: int64

### Testing Stuff

In [235]:
audio_summary_test = en.get('song/profile', id=songids[0], bucket='audio_summary')

In [16]:
# artist_id = mk.echonest_get_spotify_artist_id('The Who')
# basic_album_data = mk.spotify_get_artist_albums(artist_id, country='US')
# test = mk.get_all_album_songs(ids[0])

# Individual functions
# sp_album_info, sp_track_info = mk.spotify_get_album_info(ids[0])
# en_track_music_info = mk.echonest_get_all_song_info(track_info[0]['uri'])
# track_data = mk.combine_echonest_spotify_track_data(en_track_music_info, sp_track_info[0])

### Database management and creation (if it doesn't already exist)

In [15]:
bands = ['The Who', 'The Rolling Stones', 'Queen', 'Van Halen', 'Pink Floyd', 'Journey', 'Bon Jovi,'
         'Metallica', 'Aerosmith', 'Allman Brothers', 'Grateful Dead--', 'Black Sabbath--', 
         'The Eagles', 'ZZ Top', 'The Beach Boys', 'Slayer', 'Rush', 'Golden Earring', 
         'U2', 'Def Leppard', 'Motley Crue', 'Tom Petty and the Heartbreakers', 'Kiss', 
         'Neil Young Crazy Horse', 'Rush', 'Iron Maiden', 'Motorhead', 'Yes', 
         'Red Hot Chili Peppers', 'Cheap Trick', 'Judas Priest', 'Jethro Tull', 
         'Fleetwood Mac', 'Supertramp', 'Chicago', 'Genesis', 'The Cure', 'Poison', 
         'Simple Minds', 'Flaming Lips', 'Sonic Youth', 
         ]
no_music_bands = ['Springsteen E Street Band', 'REM', 'Earth Wind and Fire', 
                  'Huey Lewis and the News']

new_bands = []

In [None]:
# Insert albums given band name into database
# for band in new_bands:
for band in bands:
    print band
    album_ids = mk.get_all_album_ids(band)
    if album_ids != None:
        for album in album_ids:
            mk.get_all_album_songs(album['id'], insert_mongo=True, mongo_collection=musicdb)

The Who
Number of albums found: 30
Number of songs inserted into Mongo: 23
Number of songs inserted into Mongo: 21
Number of songs inserted into Mongo: 42
Number of songs inserted into Mongo: 31
Number of songs inserted into Mongo: 28
Number of songs inserted into Mongo: 21
Number of songs inserted into Mongo: 29
Number of songs inserted into Mongo: 34
Number of songs inserted into Mongo: 16
Number of songs inserted into Mongo: 14
Number of songs inserted into Mongo: 17
Number of songs inserted into Mongo: 14
Number of songs inserted into Mongo: 13
Number of songs inserted into Mongo: 17
Number of songs inserted into Mongo: 42
Number of songs inserted into Mongo: 28
Number of songs inserted into Mongo: 16
Number of songs inserted into Mongo: 63
Number of songs inserted into Mongo: 14
Number of songs inserted into Mongo: 32
Number of songs inserted into Mongo: 66
Number of songs inserted into Mongo: 43
Number of songs inserted into Mongo: 23
Number of songs inserted into Mongo: 23
Numbe

In [54]:
# # Add in additional fields after gathering from APIs
fullcollect  = musicdb.find({}, {"album_release_date":1, "album_artists":1}, timeout=False)
for b in fullcollect:
    album_year = int(b['album_release_date'].split('-')[0])
    sp_artist_id = b['album_artists'][0]['uri']
    musicdb.update({"_id": b["_id"]}, {"$set": {"custom_album_year": album_year}})
    musicdb.update({"_id": b["_id"]}, {"$set": {"custom_sp_artist_id": sp_artist_id}})

In [7]:
# # Index on album_index
musicdb.ensure_index("custom_sp_artist_id")
musicdb.ensure_index("custom_album_year")
musicdb.ensure_index("album_id")

u'album_id_1'

In [4]:
# musicdb.remove({})
musicdb.count()

1636

### Old Snips

In [39]:
def get_songids_by_artist(artist_name, en_api_object, verbose=True):
    """Gets Echo Nest song ids for all songs by given artist.
       Requires an instance of pyen.Pyen() from the pyen module.
    """
    ids = []
    results_per_request = 100 # 100 is Echo Nest API maximum
    
    num_requests = 1
    start = 0
    request_num = 0
    while request_num <= num_requests:
        response = en_api_object.get('artist/songs', name = artist_name, results=results_per_request, start=start)
        num_requests = response['total']/results_per_request
        for i, song in enumerate(response['songs']):
            ids.append(song['id'])
        request_num += 1
        start = start + results_per_request
        if verbose:
            print "Retrieved %i / %i song ids" % (len(ids), response['total'])
    return ids

In [270]:
## This still has box plot code in it. I've taken it out of the other version.

def plot_data(field, bandobjs):
    
    relevant_fields = ['custom_album_year', 'album_name', 'sp_preview_url', 'en_artist', 'en_title']
    relevant_fields.append(field)

#     for b in bandobjs:
    
    # Get data for each year
    
    # for loop exists so it's easier to set up boxplots, thought those were later abandoned.
    # Box plots require seperate trace object for each x value of points -- hence they must
    # be split up by year. Will require more work then uncommenting / changing indentation
    # to get it to work.
#     for year in range(1969, 1970):
    # Pull out relevant data
        # info = bandobjs.df[bandobjs.df['custom_album_year'] == year] # needed for box plots
    info = bandobjs.df # needed for scatter plots
    albumgroup = bandobjs.df.groupby('album_name')
    
    
    yvals = info[field].values.tolist()
    xvals = info['custom_album_year'].values.tolist()
    xvals_scatter = [int(value) + random.uniform(-0.25, 0.25) for value in xvals]

    # Create text data for hover
    urls = info['sp_preview_url'].values.tolist()
    tracknames = info['en_title'].values.tolist()
    albumnames = info['album_name'].values.tolist()
    text = ["%s\n%s\n%s" % (tracknames[n], albumnames[n], u) for n, u in enumerate(urls)]
        
        
        # Neeeded for box plots
        # bstring_box = "Box(y=" + str(yvals) + ", x0=" + str(year) + ", boxpoints='all', jitter=0.3, pointpos=-1.8)"
    data = [Scatter(y=yvals, x=xvals_scatter, text=text, mode='markers')]
    
#     data.append(bstring_scatter)
    # (end for loop)
    
    # Create Scatter objects
#     for n, bstr in enumerate(data):
    plot_url = py.plot(data, filename='scatter_test')
    return plot_url
    
t = plot_data('en_audio_summary_tempo', thewho)
