## Festival Recommendation 

### Contents

- Imports and Options
- Formulas
- Read and Clean Scrapy Data
- Spotify API calls

#### Imports and Options

In [1]:
import pandas as pd
from datetime import datetime, timedelta
from collections import defaultdict
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import pickle
import time
from sklearn import preprocessing
from sklearn.cluster import KMeans
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
import config
from copy import deepcopy

%matplotlib inline

pd.options.display.float_format = '{:20,.5f}'.format
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

#### Formulas

In [2]:
def date_split(dates, position=0):
    ''' Splits festival dates and returns start date(position = 0) or end date(position = 1)'''
    dates = dates.split('-')
    if position:
        end_date = dates[1]
        end_date = datetime.strptime(end_date, '%B %d, %Y')
        return end_date
    else:
        start_date = dates[0] + ' ' + dates[1][-4:]
        start_date = datetime.strptime(start_date, '%B %d %Y')
        return start_date
    

def make_lower(lineup):
    ''' Takes artist lineup and returns list with names lowcase and whitespace stripped'''
    lineup_new = []
    for artist in lineup:
        artist = artist.strip()
        artist = artist.lower()
        lineup_new.append(artist)
    return lineup_new


def replace_genre(genre):
    genre = genre.replace(' ', '_')
    genre = genre.replace('-', '_')
    new_genre = replace_dict[genre]
    return new_genre
        

def get_genres(lineup):
    '''Takes artists in lineup and returns list of genres'''
    genre_list =[]
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
            artist_genres =[]
            for genre in genres:
                new_genre = replace_genre(genre)
                if new_genre not in artist_genres and new_genre != '':
                    artist_genres.append(new_genre)
            if artist_genres:
                genre_list.append(artist_genres)
        except KeyError:
            next
    return genre_list
    
    
def genre_dict(lineup):
    '''Takes artist lineup and returns genre counts'''
    artist_genres = defaultdict(int)
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
            for genre in genres:
                artist_genres[genre] += 1
        except KeyError:
            next
    return artist_genres


def no_spotify_info(lineup):
    '''Takes artist lineup and returns artist not in Spotify database'''
    unknown = []
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
        except KeyError:
            unknown.append(artist)
    return unknown


def top_three(genre_list):
    if len(genre_list) < 4:
        return genre_list
    else:
        most_pop = []     
        for genre in genre_list:
            most_pop.append((all_genres[genre], genre))
        most_pop.sort(reverse=True)
        selection = []
        for i in range(0, 3):
            top_genre = most_pop[i][1]
            selection.append(top_genre)
        return selection

def make_genre_counts(genre_lists):
    lineup_genres = defaultdict(int)
    for genre_list in genre_lists:
        if genre_list:
            genre_list = top_three(genre_list)
            for genre in genre_list:
                lineup_genres[genre] += 1
    return lineup_genres


def user_genres(artists):
    genres_dict = defaultdict(int)
    for artist in artists:
        try:
            genre_result = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['genres']
            genre_list = []
            for genre in genre_result:
                new_genre = replace_genre(genre)
                if new_genre not in genre_list:
                    genre_list.append(new_genre)
            genre_list = top_three(genre_list)
            for genre in genre_list:
                genres_dict[genre] += 1
        except IndexError:
            next
    df = pd.DataFrame(genres_dict, index=["User",])
    df_norm = pd.DataFrame(preprocessing.normalize(df, norm='l1'), index=["User",], columns=list(df.columns))
    return df_norm


def add_weights(x):
    fest_name = x[1]
    lineup = list(festivals[festivals.name == fest_name]['lineup'])[0]
    count = 0
    for artist in user_artists:
        if artist in lineup:
            count += 1
    new_dist = x[0] + (count * 0.15)
    return new_dist


def artist_genre_replace(g_list):
    genre_string = ''
    if g_list:
        for i in range(0, len(g_list)):
            g_list[i] = replace_genre(g_list[i])
        new_genres = list(set(g_list))
        genre_string = ",".join(new_genres)
    return genre_string
            
    
def track_averages(artist):
    averages = defaultdict(int)
    if artist in artist_average:
        return artist_average[artist]
    else:
        count = 0
        danceability = 0
        energy = 0
        key = 0
        loudness = 0
        speechiness = 0
        acousticness = 0
        instrumentalness = 0
        liveness = 0
        valence = 0
        tempo = 0
        try:
            artist_id = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['uri']
            top_tracks = spotify.artist_top_tracks(artist_id)
            track_features = []
            for i in range(0, len(top_tracks['tracks'])):
                song_id = str(top_tracks['tracks'][i]['uri'])
                features = spotify.audio_features(song_id)
                track_features.append(features)
            for track in track_features:
                count += 1.0
                danceability += track[0]['danceability']
                energy += track[0]['energy']
                key += track[0]['key']
                loudness += track[0]['loudness']
                speechiness += track[0]['speechiness']
                acousticness += track[0]['acousticness']
                instrumentalness += track[0]['instrumentalness']
                liveness += track[0]['liveness']
                valence += track[0]['valence']
                tempo += track[0]['tempo']
        except TypeError:
                next
        if count == 0:
            count = 1
        averages['danceability'] = danceability / count
        averages['energy'] = energy / count
        averages['key'] = key / count
        averages['loudness'] = loudness / count
        averages['speechiness'] = speechiness / count
        averages['acousticness'] = acousticness / count
        averages['instrumentalness'] = instrumentalness / count
        averages['liveness'] = liveness / count
        averages['valence'] = valence / count
        averages['tempo'] = tempo / count
        try:
            averages['genres'] = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['genres']
        except IndexError:
            averages['genres'] = []
        return averages
    
    
def get_image(x):
    try:
        return artist_info[x]['images'][0]['url']
    except IndexError:
        return 'http://www2.pictures.zimbio.com/mp/RyOQVmpiyZB+O937YJarJVm+594x400.jpg'
    except KeyError:
        return 'http://www2.pictures.zimbio.com/mp/RyOQVmpiyZB+O937YJarJVm+594x400.jpg'

#### Read and Clean Scrapy Data

In [5]:
# Read in festival data
all_festivals = pd.read_json("pretty_festivals.json")

In [6]:
# Formatt columns
all_festivals['start_date'] = all_festivals.dates.apply(date_split)
all_festivals['end_date'] = all_festivals.dates.apply(lambda x: date_split(x, 1))
all_festivals['duration'] = all_festivals.end_date - all_festivals.start_date + timedelta(days=1)
all_festivals['lineup'] = all_festivals.lineup.apply(make_lower)



# Reorder columns and sort rows by date 
column_order = [
    'name', 
    'start_date', 
    'end_date', 
    'location', 
    'website', 
    'lineup', 
    'image'
]
all_festivals = all_festivals[column_order]
all_festivals.sort_values('start_date', inplace=True)
all_festivals = all_festivals.reset_index(drop=True)

# Remove festivals without lineups
have_lineup = []

for fest_lineup in all_festivals.lineup:
    if len(fest_lineup) >= 10:
        have_lineup.append(True)
    else:
        have_lineup.append(False)
        
festivals = all_festivals[have_lineup]
festivals = festivals.reset_index(drop=True)

festivals['id'] = pd.Series(all_festivals.index)

# View dataframe head
festivals.head()

Unnamed: 0,name,start_date,end_date,location,website,lineup,image,id
0,Savannah Music Festival 2017,2017-03-23,2017-04-08,"Savannah, GA",http://www.savannahmusicfestival.org/,"[the avett brothers, jason isbell, nikki lane,...",https://www.musicfestivalwizard.com/wp-content...,0
1,Ultra Miami 2017,2017-03-24,2017-03-26,"Miami, FL",http://www.ultramusicfestival.com/,"[chase & status, cypress hill, ice cube, justi...",https://www.musicfestivalwizard.com/wp-content...,1
2,Winter Wonder Grass Tahoe 2017,2017-03-30,2017-04-02,"Squaw Valley, CA",http://www.winterwondergrasstahoe.com/,"[greensky bluegrass, yonder mountain string ba...",https://www.musicfestivalwizard.com/wp-content...,2
3,Desert Hearts 2017,2017-03-31,2017-04-03,"Warner Springs, CA",http://www.deserthearts.us/,"[ardalan, atish, ben seagren, christian martin...",https://www.musicfestivalwizard.com/wp-content...,3
4,Fool's Paradise 2017,2017-03-31,2017-04-01,"St. Augustine, FL",http://www.foolsparadisefl.com/,"[lettuce, dumpstaphunk, the floozies, joe russ...",https://www.musicfestivalwizard.com/wp-content...,4


#### Spotify API Calls (Spotify Data pickled below)

In [7]:
# Initialize Spotify API package
ccm = SpotifyClientCredentials(
    client_id = config.client_id, 
    client_secret = config.client_secret
)

spotify = spotipy.Spotify(client_credentials_manager=ccm)

In [None]:
# Dictionary with artist info (genres, spotify uri, images, audio features)
artist_dict = defaultdict(dict)
unknowns = []

In [12]:
for lineup in tqdm(all_festivals.lineup):
    for artist in lineup:
        if artist not in artist_dict:
            search_result = spotify.search(q='artist:' + artist, type='artist')
            try:
                artist_dict[artist]['genres'] = search_result['artists']['items'][0]['genres']
                artist_dict[artist]['images'] = search_result['artists']['items'][0]['images']
                artist_dict[artist]['uri'] = search_result['artists']['items'][0]['uri']
            except IndexError:
                unknowns.append(artist)

100%|██████████| 206/206 [02:02<00:00,  1.68it/s]


In [49]:
for artist, data in artist_dict.items():
    artist_id = data['uri']
    top_tracks = spotify.artist_top_tracks(artist_id)['tracks']
    if top_tracks:
        track_features = []
        for i in range(len(top_tracks)):
            song_id = str(top_tracks[i]['uri'])
            features = spotify.audio_features(song_id)
            track_features.append(features)
        artist_dict[artist]['audio_feat'] = track_features

SpotifyException: http status: 401, code:-1 - https://api.spotify.com/v1/audio-features/?ids=6Db6GXcCXfDMgVHbSPRBZj:
 The access token expired

In [47]:
spotify.audio_features(str(spotify.artist_top_tracks(artist_dict['cage the elephant']['uri'])['tracks'][0]['uri']))

[{u'acousticness': 0.0438,
  u'analysis_url': u'https://api.spotify.com/v1/audio-analysis/3kb72STxc2959ZqsTwu52i',
  u'danceability': 0.634,
  u'duration_ms': 175493,
  u'energy': 0.849,
  u'id': u'3kb72STxc2959ZqsTwu52i',
  u'instrumentalness': 0,
  u'key': 0,
  u'liveness': 0.363,
  u'loudness': -7.075,
  u'mode': 1,
  u'speechiness': 0.105,
  u'tempo': 156.004,
  u'time_signature': 4,
  u'track_href': u'https://api.spotify.com/v1/tracks/3kb72STxc2959ZqsTwu52i',
  u'type': u'audio_features',
  u'uri': u'spotify:track:3kb72STxc2959ZqsTwu52i',
  u'valence': 0.91}]

In [31]:
for artist, data in tqdm(artist_dict.items()):
    try:
        if data['audio_feat']:
            averages = defaultdict(int)
            count = 0
            danceability = 0
            energy = 0
            key = 0
            loudness = 0
            speechiness = 0
            acousticness = 0
            instrumentalness = 0
            liveness = 0
            valence = 0
            tempo = 0
            for track in data['audio_feat']:
                count += 1.0
                danceability += track[0]['danceability']
                energy += track[0]['energy']
                key += track[0]['key']
                loudness += track[0]['loudness']
                speechiness += track[0]['speechiness']
                acousticness += track[0]['acousticness']
                instrumentalness += track[0]['instrumentalness']
                liveness += track[0]['liveness']
                valence += track[0]['valence']
                tempo += track[0]['tempo']
            averages['danceability'] = danceability / count
            averages['energy'] = energy / count
            averages['key'] = key / count
            averages['loudness'] = loudness / count
            averages['speechiness'] = speechiness / count
            averages['acousticness'] = acousticness / count
            averages['instrumentalness'] = instrumentalness / count
            averages['liveness'] = liveness / count
            averages['valence'] = valence / count
            averages['tempo'] = tempo / count
            artist_dict[artist]['averages'] = averages
    except KeyError:
        print artist






  0%|          | 0/3973 [00:00<?, ?it/s]

alaman
paul mckenna
savior adore
bill mathis
marcel dettman
ian winters
ryan herr
rare individuals
chukwudi hodge
white cliffs
bobby sessions
darko
reverend dollars
donormaal
zane lowe
kate nv
eddie quotez
new love
deradoorian
idris ackamoor & the pyramids
junior sisk & ramblers choice
the n.s.p
josefin rosen
shabaka hutchings
taylar elizza beth
nefe
joey landreth
choir choir choir!
dev3n
choir! choir! choir!
lophiile
labs
eddy boy
pat mcinerney
unexotic
domineko
brownout presents brown sabbath
velvet negroni
jackmaster
mikey cloud
down north
dj hanzel
yu su


[A[A[A[A[A




[A[A[A[A[A

TypeError: unsupported operand type(s) for +=: 'int' and 'NoneType'

In [470]:
# pickel for later
with open('artist_dict.pk1', 'wb') as picklefile:
    pickle.dump(artist_dict, picklefile)

### Music Festival Recommender

#### Get Genres

In [None]:
# Over 750 genres pulled from spotify, need to condense
genre_rename = pd.read_csv('music_genres.csv', header=None, names=['old_genre', 'new_genre'])

replace_dict = defaultdict(str)

for i in range(0, len(genre_rename)):
    replace_dict[genre_rename.ix[i,0]] = genre_rename.ix[i, 1]
    

In [467]:
# pickel for later
with open('replace_dict.pk1', 'wb') as picklefile:
    pickle.dump(replace_dict, picklefile)

In [438]:
# load pickeled spotify data
with open('artist_info.pk1', 'rb') as picklefile:
    artist_info = pickle.load(picklefile)

In [439]:
# Add genre counts to dataframe
festivals['genres'] = festivals['lineup'].apply(get_genres)

# List of bands not in spotify database
festivals['unknown'] = festivals['lineup'].apply(no_spotify_info)
festivals['num_bands'] = festivals['lineup'].apply(lambda x: len(x))
festivals['num_unknowns'] = festivals['unknown'].apply(lambda x: len(x))
festivals['unknown_percent'] = festivals['num_unknowns'] / festivals['num_bands']

In [469]:
with open('festivals.pk1', 'wb') as picklefile:
    pickle.dump(festivals, picklefile) 

In [None]:
# genres dictionary
all_genres = defaultdict(int)

for genre_list in festivals.genres:
    for artist_genres in genre_list:
        for genre in artist_genres:
            all_genres[genre] += 1
            

In [468]:
with open('all_genres.pk1', 'wb') as picklefile:
    pickle.dump(all_genres, picklefile)  

In [442]:
festivals['genres_revised'] = festivals['genres'].apply(make_genre_counts)

In [444]:
# Make DataFrame of festival genres
first = festivals.ix[0,18]
fest_genres = pd.DataFrame(first, index=[festivals.ix[0,1],])

for i in range(1, len(festivals)):
    d = festivals.ix[i, 18]
    df = pd.DataFrame(d, index=[festivals.ix[i,1],])
    fest_genres = fest_genres.append(df)
    
fest_genres = fest_genres.fillna(0)

In [445]:
# Drop festivals with low percentage of bands in spotify database
skip = [
    "CMA Music Festival 2017", 
    "Mad Tea Party Jam 2017", 
    "Highberry 2017", 
    "Orange Blossom Jamboree 2017",
    "Purple Hatters Ball 2017",
    "The Werk Out 2017"
    
]
fest_genres.drop(skip, inplace=True)

In [447]:
fest_genres_norm = pd.DataFrame(
    preprocessing.normalize(fest_genres, norm='l1'), 
    columns=list(fest_genres.columns),
    index=list(fest_genres.index)
)

In [466]:
# pickel for later
with open('fest_genres_norm.pk1', 'wb') as picklefile:
    pickle.dump(fest_genres_norm, picklefile)

In [449]:
# load pickeled genre data
with open('fest_genres_norm.pk1', 'rb') as picklefile:
    fest_genres_norm = pickle.load(picklefile)

In [511]:
user_artists = ['cage the elephant', 'chance the rapper', 'metric']

user = user_genres(user_artists)
user

Unnamed: 0,alternative_rock,electronic,folk,pop,rap,rock
User,0.14286,0.14286,0.14286,0.28571,0.14286,0.14286


In [512]:
user_df = fest_genres_norm.append(user)
user_df = user_df.fillna(0)
user_df

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
Savannah Music Festival 2017,0.0,0.0,0.0,0.0,0.0,0.0,0.11111,0.0,0.0,0.0,0.0,0.0,0.0,0.37037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.25926,0.0,0.0,0.0,0.0,0.0,0.25926,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ultra Miami 2017,0.00455,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.02727,0.23636,0.0,0.0,0.0,0.00455,0.02273,0.35,0.0,0.00455,0.0,0.0,0.00455,0.07273,0.0,0.0,0.0,0.01364,0.0,0.00909,0.00455,0.0,0.1,0.03636,0.10455,0.00455,0.0
Winter Wonder Grass Tahoe 2017,0.0,0.0,0.02632,0.0,0.0,0.0,0.42105,0.0,0.0,0.0,0.0,0.0,0.0,0.36842,0.0,0.0,0.0,0.05263,0.0,0.0,0.0,0.0,0.05263,0.0,0.0,0.0,0.0,0.0,0.07895,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Desert Hearts 2017,0.0,0.05882,0.0,0.0,0.0,0.0,0.05882,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.58824,0.0,0.0,0.0,0.05882,0.0,0.11765,0.0,0.0,0.0,0.0,0.0,0.05882,0.0,0.0,0.05882,0.0,0.0,0.0,0.0
Fool's Paradise 2017,0.0,0.0,0.15385,0.0,0.0,0.0,0.07692,0.0,0.0,0.0,0.0,0.0,0.0,0.38462,0.23077,0.0,0.0,0.0,0.07692,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.07692
Edgefest Texas 2017,0.2963,0.0,0.07407,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03704,0.0,0.0,0.07407,0.0,0.0,0.0,0.03704,0.0,0.0,0.14815,0.0,0.07407,0.0,0.0,0.0,0.0,0.0,0.22222,0.03704,0.0,0.0,0.0,0.0,0.0,0.0
Country Thunder Arizona 2017,0.0,0.0,0.0,0.0,0.0,0.0,0.6129,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.03226,0.0,0.25806,0.03226,0.0,0.0,0.0,0.0,0.0,0.03226,0.0,0.0,0.0,0.0,0.0,0.03226,0.0
Euphoria Festival 2017,0.0,0.01087,0.0,0.0,0.0,0.0,0.0,0.02174,0.01087,0.03261,0.28261,0.0,0.0,0.05435,0.0,0.08696,0.21739,0.0,0.0,0.0,0.0,0.0,0.05435,0.0,0.0,0.03261,0.04348,0.0,0.0,0.03261,0.0,0.0,0.01087,0.1087,0.0,0.0
Snowbombing Canada 2017,0.01786,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01786,0.03571,0.26786,0.0,0.0,0.0,0.0,0.03571,0.28571,0.03571,0.0,0.0,0.0,0.0,0.08929,0.0,0.0,0.0,0.0,0.0,0.0,0.01786,0.0,0.01786,0.0,0.17857,0.0,0.0
Rhythm N Blooms 2017,0.0,0.03226,0.03226,0.0,0.0,0.0,0.19355,0.0,0.0,0.0,0.03226,0.0,0.0,0.32258,0.03226,0.0,0.0,0.06452,0.0,0.0,0.0,0.0,0.09677,0.0,0.0,0.0,0.0,0.0,0.16129,0.03226,0.0,0.0,0.0,0.0,0.0,0.0


In [550]:
fest_genres_norm.ix[65:66, :]

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
Governors Ball Music Festival 2017,0.07092,0.02128,0.00709,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.14184,0.0,0.0,0.07801,0.00709,0.07092,0.08511,0.03546,0.0,0.0,0.01418,0.0,0.25532,0.0,0.0,0.03546,0.05674,0.0,0.04255,0.00709,0.0,0.0,0.0,0.07092,0.0,0.0


In [513]:
user_case = user_df.ix[-1,:]
distances = pd.DataFrame(
    cosine_similarity(fest_genres_norm, user_case.reshape(1, -1)),
    index = list(fest_genres_norm.index),
    columns=['distance']
)

In [514]:
distances['name'] = distances.index
distances['weighted_dist'] = distances.apply(add_weights, axis=1)
distances = distances.sort_values("weighted_dist", ascending=False)


In [515]:
distances.reset_index(drop=True)

Unnamed: 0,distance,name,weighted_dist
0,0.92936,Boston Calling 2017,1.22936
1,0.85931,Governors Ball Music Festival 2017,1.15931
2,0.83843,Bonnaroo Music Festival 2017,1.13843
3,0.82367,Warped Tour 2017,1.12367
4,0.9076,Forecastle Festival 2017,1.0576
5,0.88997,Shaky Knees Festival 2017,1.03997
6,0.87972,Osheaga 2017,1.02972
7,0.87745,Firefly Music Festival 2017,1.02745
8,0.86522,Sasquatch Festival 2017,1.01522
9,0.83296,Hangout Fest 2017,0.98296


In [464]:
distances = pd.merge(distances, festivals, on='name')
distances.drop(
    [
        'distance', 
        'description', 
        'url', 
        'num_bands', 
        'num_unknowns', 
        'unknown_percent', 
        'genres',
        'unknown'
    ], 
    axis=1, 
    inplace=True
)

In [465]:
distances

Unnamed: 0,name,weighted_dist,id,start_date,end_date,location,tickets,camping,website,lineup,poster,image,genres_revised
0,Firefly Music Festival 2017,1.05932,78,2017-06-15,2017-06-18,"Dover, DE",,Yes,http://fireflyfestival.com/,"[bob dylan, chance the rapper, the weeknd, twe...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 6, u'house': 22, u'pop': 57, u'dubs..."
1,Osheaga 2017,0.83302,181,2017-08-04,2017-08-06,"Montreal, QC",$320 CAD,No,http://www.osheaga.com/,"[muse, the weeknd, lorde, alabama shakes, just...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'techno': 3, u'hip_hop': 10, u'punk': 2, u'c..."
2,Bunbury Music Festival 2017,0.82814,71,2017-06-02,2017-06-04,"Cincinnati, OH",$179,No,http://bunburyfestival.com/,"[muse, wiz khalifa, g-eazy, bassnectar, pretty...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'reggae': 1, u'hip_hop': 6, u'punk': 1, u'in..."
3,Fort Rock 2017,0.81684,17,2017-04-29,2017-04-30,"Fort Myers, FL",,No,http://www.fortrockfestival.com/,"[def leppard, soundgarden, a perfect circle, t...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 2, u'metal': 20, u'emo': 2, u'rock'..."
4,Quebec City Summer Festival 2017,0.81636,192,2017-07-06,2017-07-16,"Quebec City, QC",$95,No,http://www.infofestival.com/Home/,"[amadou et mariam, andy shauf, arkells, backst...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'reggae': 1, u'hip_hop': 3, u'punk': 1, u'r&..."
5,Rocklahoma 2017,0.81597,61,2017-05-26,2017-05-28,"Pryor, OK",,Yes,http://www.rocklahoma.com/,"[soundgarden, def leppard, the offspring, ston...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'classic_rock': 1, u'metal': 28, u'emo': 3, ..."
6,Bonnaroo Music Festival 2017,0.81188,63,2017-06-08,2017-06-11,"Manchester, TN",$324.50,Yes,http://www.bonnaroo.com/,"[u2, red hot chili peppers, the weeknd, chance...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 6, u'house': 21, u'pop': 45, u'dubs..."
7,Carolina Rebellion 2017,0.80982,30,2017-05-05,2017-05-07,"Concord, NC",,Yes,http://www.carolinarebellion.com/,"[soundgarden, def leppard, avenged sevenfold, ...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'hip_hop': 1, u'indie': 2, u'metal': 38, u'e..."
8,Sunfest 2017,0.80505,33,2017-05-03,2017-05-07,"West Palm Beach, FL",$45-$70,No,http://www.sunfest.com/,"[blink-182, weezer, macklemore & ryan lewis, w...",[],https://www.musicfestivalwizard.com/wp-content...,"{u'reggae': 4, u'indie': 1, u'hip_hop': 3, u'd..."
9,Las Rageous 2017,0.80432,25,2017-04-21,2017-04-22,"Las Vegas, NV",$99,No,http://www.lasrageous.com/,"[godsmack, anthrax, coheed and cambria, killsw...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'metal': 11, u'emo': 1, u'alternative_rock':..."


In [456]:
distances.to_dict(orient='index')

{0: {u'camping': u' No',
  'end_date': Timestamp('2017-07-16 00:00:00'),
  'genres_revised': defaultdict(int,
              {'alternative_rock': 5,
               'ambient': 1,
               'electronic': 16,
               'folk': 20,
               'funk': 2,
               'hip_hop': 7,
               'house': 2,
               'indie': 2,
               'metal': 1,
               'pop': 29,
               'r&b': 4,
               'rap': 1,
               'rock': 12,
               'soul': 1,
               'trap': 4,
               'worship': 1}),
  'id': 107,
  u'image': u'https://www.musicfestivalwizard.com/wp-content/uploads/2012/02/Pitchfork_Music_Festival_Chicago_Logo.png',
  u'lineup': [u'lcd soundsystem',
   u'dirty projectors',
   u'danny brown',
   u'thurston moore',
   u'vince staples',
   u'arca',
   u'kamaiyah',
   u'hiss golden messenger',
   u'frankie cosmos',
   u'william tyler',
   u'dawn',
   u'priests',
   u'madame ghandi',
   u'a tribe called quest',
   u'pj har

In [458]:
festivals.head()

Unnamed: 0,id,name,start_date,end_date,location,tickets,camping,website,description,lineup,url,poster,image,genres,unknown,num_bands,num_unknowns,unknown_percent,genres_revised
0,10,Savannah Music Festival 2017,2017-03-23,2017-04-08,"Savannah, GA",,No,http://www.savannahmusicfestival.org/,THE SAVANNAH MUSIC FESTIVAL IS DEDICATED TO PR...,"[the avett brothers, jason isbell, nikki lane,...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...,"[[country, folk, pop, ambient, rock], [country...",[chicago blues meets gulf coast boogie: lurrie...,15,2,0.13333,"{u'country': 3, u'folk': 10, u'pop': 7, u'rock..."
1,1,Ultra Miami 2017,2017-03-24,2017-03-26,"Miami, FL",,No,http://www.ultramusicfestival.com/,"FOR EDM FANS AROUND THE WORLD, ULTRA IN MIAMI ...","[chase & status, cypress hill, ice cube, justi...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[dubstep, house, trap, electronic, metal, pop...","[sasha & john digweed, nwyr]",101,2,0.0198,"{u'jazz': 1, u'hip_hop': 5, u'house': 77, u'al..."
2,12,Winter Wonder Grass Tahoe 2017,2017-03-30,2017-04-02,"Squaw Valley, CA",,No,http://www.winterwondergrasstahoe.com/,"Lively national, regional and local bluegrass ...","[greensky bluegrass, yonder mountain string ba...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[country, folk, blues, rock], [country, folk,...","[sam bush band, the bluegrass generals, brad p...",26,4,0.15385,"{u'indie': 2, u'country': 16, u'pop': 2, u'roc..."
3,11,Desert Hearts 2017,2017-03-31,2017-04-03,"Warner Springs, CA",,Yes,http://www.deserthearts.us/,Desert Hearts is a bi-annual music and arts fe...,"[ardalan, atish, ben seagren, christian martin...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...,"[[metal, rock, ambient], [house], [house, pop]...","[deep jesus, evan casey, malcom brown, monolin...",37,6,0.16216,"{u'country': 1, u'rock': 1, u'ambient': 1, u'm..."
4,0,Fool's Paradise 2017,2017-03-31,2017-04-01,"St. Augustine, FL",$65-$250,Yes,http://www.foolsparadisefl.com/,Fool's Paradise is bringing your favorite band...,"[lettuce, dumpstaphunk, the floozies, joe russ...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[funk, folk, blues], [funk, folk, blues], [fo...","[joe russo's almost dead, manic science, fools...",11,3,0.27273,"{u'country': 1, u'jazz': 1, u'funk': 3, u'blue..."


### Artist Recommender

In [516]:
with open('artist_average.pk1', 'rb') as picklefile:
    artist_average = pickle.load(picklefile)

In [517]:
user_artists = ['the weeknd', 'chance the rapper', 'muse']

In [518]:
user_artist_average = defaultdict(dict)
for i, artist in enumerate(user_artists):
    user_artist_average['user' + str(i)] = track_averages(artist)

In [520]:
user_df = pd.DataFrame(user_artist_average).T

In [521]:
artists = pd.DataFrame(artist_average).T
artists = artists.append(user_df)
artists['genres'] = artists['genres'].apply(artist_genre_replace)
artists.tail()

Unnamed: 0,acousticness,danceability,energy,genres,instrumentalness,key,liveness,loudness,speechiness,tempo,valence
zoogma,0.01946,0.4577,0.7616,folk,0.34878,4.6,0.2804,-5.6239,0.08696,114.3065,0.561
zuluzuluu,0.24006,0.532,0.55425,,0.0007,3.875,0.12837,-8.72825,0.12476,100.65675,0.3891
user0,0.17535,0.6563,0.6019,pop,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438
user1,0.41616,0.6574,0.5105,",rap",0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46
user2,0.01988,0.5405,0.8236,"rock,metal,alternative_rock",0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376


In [522]:
df_genre = artists['genres'].str.get_dummies(sep=',')
df_genre.head()

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,jazz,latin,metal,pop,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,world,worship
#familygrind,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
'brink' brinkman,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
070 shake,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10 string symphony,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
100 mile house,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [523]:
artist_recommend = pd.concat([artists, df_genre], axis=1)

In [524]:
artist_recommend.drop('genres', axis=1, inplace=True)
artist_recommend.fillna(0)
artist_recommend.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,jazz,latin,metal,pop,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,world,worship
#familygrind,0.14474,0.591,0.6881,0.0,6.6,0.2233,-8.7772,0.3968,84.8542,0.5679,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
'brink' brinkman,0.4659,0.617,0.4112,0.00011,4.9,0.2129,-9.5486,0.03211,110.473,0.6034,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
070 shake,0.268,0.64743,0.52929,0.0,4.71429,0.21657,-8.55971,0.09321,114.36929,0.3019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10 string symphony,0.9573,0.7103,0.17343,0.01511,3.6,0.09985,-11.3705,0.04971,116.1923,0.5246,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
100 mile house,0.7189,0.3987,0.3181,0.29889,4.2,0.12369,-13.5425,0.03412,120.9089,0.33048,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [525]:
user_fest = "Firefly Music Festival 2017"
lineup = festivals[festivals.name == user_fest]['lineup']
recommend_df = artist_recommend.loc[list(lineup)[0]]
recommend_df = recommend_df.fillna(0)
recommend_df

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,jazz,latin,metal,pop,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,world,worship
bob dylan,0.61179,0.4877,0.35543,0.01829,4.7,0.15351,-15.0576,0.04494,138.7041,0.4949,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
chance the rapper,0.41616,0.6574,0.5105,0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the weeknd,0.17535,0.6563,0.6019,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
twenty one pilots,0.08144,0.6551,0.6396,0.00013,4.7,0.11614,-6.5414,0.06305,114.766,0.54,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
muse,0.01988,0.5405,0.8236,0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
flume,0.22171,0.5466,0.6061,0.00063,7.1,0.1194,-5.8825,0.11562,111.9679,0.3199,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
weezer,0.02798,0.5517,0.7455,0.03499,5.4,0.13042,-5.8304,0.05187,109.5974,0.54993,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
the shins,0.1584,0.5502,0.7153,0.01884,2.8,0.25556,-6.8871,0.03715,125.2453,0.6268,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
thirty seconds to mars,0.028,0.4457,0.8518,0.02992,6.2,0.24206,-4.5112,0.06744,147.6048,0.2156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
dillon francis,0.04023,0.6848,0.8557,0.08756,4.1,0.26248,-3.5772,0.10561,121.5386,0.5184,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [526]:
user_df = artist_recommend.loc[list(user_df.index)]
user_df = user_df.fillna(0)
user_df

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,jazz,latin,metal,pop,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,world,worship
user0,0.17535,0.6563,0.6019,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
user1,0.41616,0.6574,0.5105,0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
user2,0.01988,0.5405,0.8236,0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0


In [527]:
artist_distances = pd.DataFrame(list(recommend_df.index), index=list(recommend_df.index), columns=['names'])

for i in range(0, len(user_df)):
    user_case = user_df.ix[i,:]
    distances = pd.DataFrame(
        euclidean_distances(recommend_df, user_case.reshape(1, -1)),
        index = list(recommend_df.index),
        columns=['distance' + str(i)]
    )
    artist_distances = pd.concat([artist_distances, distances], axis=1)

In [528]:
artist_distances['min'] = artist_distances[list(artist_distances.columns[1:])].min(axis=1)

In [529]:
artist_distances.sort_values("min")

Unnamed: 0,names,distance0,distance1,distance2,min
chance the rapper,chance the rapper,4.73281,0.0,3.98867,0.0
the weeknd,the weeknd,0.0,4.73281,5.47965,0.0
muse,muse,5.47965,3.98867,0.0,0.0
vita and the woolf,vita and the woolf,5.89949,1.85818,3.65758,1.85818
alex wiley,alex wiley,2.15128,4.56241,5.25109,2.15128
bishop briggs,bishop briggs,5.27155,2.18754,4.3799,2.18754
salt cathedral,salt cathedral,2.24925,3.46166,4.73516,2.24925
dead man fall,dead man fall,4.22232,2.35691,3.50855,2.35691
sub-radio,sub-radio,5.53796,2.36228,2.51235,2.36228
animal years,animal years,2.53633,2.91257,3.92243,2.53633


In [260]:
from sklearn.metrics.pairwise import euclidean_distances
from sklearn.metrics.pairwise import manhattan_distances

In [498]:
df = artist_distances[['names', 'min']]
df['pic'] = df['names'].apply(get_image)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [None]:
results_dict = df.to_dict(orient='index')
results = []
for key, value in results_dict.items():
    results.append(value)

### Data for D3 visualization

In [376]:
d3 = deepcopy(festivals)

In [377]:
d3.name = d3['name'].apply(lambda x: x[:-5])

In [378]:
d3 = d3.drop([
        'tickets', 
        'camping', 
        'description', 
        'url', 
        'unknown', 
        'num_bands', 
        'num_unknowns', 
        'unknown_percent',
        'genres', 
        'genres_revised',
        'lineup', 
        'image'
    ], axis=1)

In [379]:
d3.head()

Unnamed: 0,name,start_date,end_date,location,website,poster
0,Savannah Music Festival,2017-03-23,2017-04-08,"Savannah, GA",http://www.savannahmusicfestival.org/,[]
1,Ultra Miami,2017-03-24,2017-03-26,"Miami, FL",http://www.ultramusicfestival.com/,[https://www.musicfestivalwizard.com/wp-conten...
2,Winter Wonder Grass Tahoe,2017-03-30,2017-04-02,"Squaw Valley, CA",http://www.winterwondergrasstahoe.com/,[https://www.musicfestivalwizard.com/wp-conten...
3,Desert Hearts,2017-03-31,2017-04-03,"Warner Springs, CA",http://www.deserthearts.us/,[]
4,Fool's Paradise,2017-03-31,2017-04-01,"St. Augustine, FL",http://www.foolsparadisefl.com/,[https://www.musicfestivalwizard.com/wp-conten...


In [None]:
from geopy.geocoders import Nominatim
from time import sleep

geolocator = Nominatim()

def get_lat_long(loc):
    try:
        location = geolocator.geocode(loc)
        return [location.latitude, location.longitude]
    except:
        print "failed {}".format(loc)
        pass

coordinates = []
for location in d3.location:
    coordinates.append(get_lat_long(location))

In [352]:
coordinates[37] = get_lat_long("Atlanta, GA")
coordinates[58] = get_lat_long("Las Vegas, NV")
coordinates[68] = get_lat_long("Toronto, ON")
coordinates[75] = get_lat_long("Nashville, TN")
coordinates[77] = get_lat_long("Bethel, NY")
coordinates[105] = get_lat_long("Quincy, CA")
coordinates[108] = get_lat_long("Quebec City, QC")
coordinates[125] = get_lat_long("Squaw Valley, CA")
coordinates[135] = get_lat_long("Detroit, MI")
coordinates[137] = get_lat_long("Camrose, AB")
coordinates[148] = get_lat_long("Calgary, AB, CA")
coordinates[140] = get_lat_long("Montreal, Canada")
coordinates[98] = get_lat_long("Montebello, Canada")
coordinates[103] = get_lat_long("Montreal, Canada")
coordinates[146] = get_lat_long("Ochoco National Forest, OR")

failed Montebello, QB, CA
failed Montreal, QB, CA
failed Detroit, MI
failed Montreal, QB, CA
failed Big Summit Prairie, OR


In [380]:
d3['coordinates'] = pd.Series(coordinates)

In [382]:
d3.index = list(d3.name)
d3 = d3.drop('name', axis=1)

In [383]:
d3.head()

Unnamed: 0,start_date,end_date,location,website,poster,coordinates
Savannah Music Festival,2017-03-23,2017-04-08,"Savannah, GA",http://www.savannahmusicfestival.org/,[],"[32.0835407, -81.0998341]"
Ultra Miami,2017-03-24,2017-03-26,"Miami, FL",http://www.ultramusicfestival.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[25.7742658, -80.1936588]"
Winter Wonder Grass Tahoe,2017-03-30,2017-04-02,"Squaw Valley, CA",http://www.winterwondergrasstahoe.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[36.7402261, -119.2467849]"
Desert Hearts,2017-03-31,2017-04-03,"Warner Springs, CA",http://www.deserthearts.us/,[],"[33.2822596, -116.6336302]"
Fool's Paradise,2017-03-31,2017-04-01,"St. Augustine, FL",http://www.foolsparadisefl.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[29.8946952, -81.3145394]"


In [411]:
d3_data = defaultdict(list)

for index in d3.index:
    d3_data[index] = [
        d3.loc[index, 'coordinates'][0], 
        d3.loc[index, 'coordinates'][1],
        d3.loc[index, 'start_date'].strftime('%B %d %Y'), 
        d3.loc[index, 'end_date'].strftime('%B %d %Y'), 
        d3.loc[index, 'location'], 
        d3.loc[index, 'website'], 
        d3.loc[index, 'poster']
    ]

In [412]:
d3_data

defaultdict(list,
            {u'4 Peaks Music Festival': [44.0581728,
              -121.3153095,
              'June 15 2017',
              'June 18 2017',
              u'Bend, OR',
              u'http://4peaksmusic.com/',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2016/11/4-Peaks-2017-Lineup-oster.jpg']],
             u'Arise Music Festival': [40.3977612,
              -105.07498,
              'August 04 2017',
              'August 07 2017',
              u'Loveland, CO',
              u'http://www.arisefestival.com',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2016/09/ARISE-2017-Lineup-Festival.jpg']],
             u'Arroyo Seco Weekend': [34.1476452,
              -118.1444778,
              'June 24 2017',
              'June 25 2017',
              u'Pasadena, Ca',
              u'https://www.arroyosecoweekend.com/',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2017/03/Arroyo-Seco-2017-Lineu

In [413]:
import json
with open('map_data.json', 'w') as fp:
    json.dump(d3_data, fp)

u'https://i.scdn.co/image/1c903e1c9fb2ffb682de31ad1f66eb8bc86a2b69'

In [None]:
kmeans = KMeans(n_clusters=8, random_state=0)
kmeans.fit(fest_genres_norm)

In [None]:
pd.Series(kmeans.labels_).value_counts()

In [None]:
fest_genres['clusters'] = kmeans.labels_

In [None]:
single_cluster = fest_genres[fest_genres['clusters'] == 7]
festivals[festivals['name'].isin(list(single_cluster.index))]