## Festival Recommendation 

### Contents

- Imports and Options
- Formulas
- Read and Clean Scrapy Data
- Spotify API calls

#### Imports and Options

In [48]:
import pandas as pd
from datetime import datetime, timedelta
from collections import defaultdict
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
import pickle
import time
from sklearn import preprocessing
from sklearn.cluster import KMeans
from tqdm import tqdm
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
import config
from copy import deepcopy

%matplotlib inline

pd.options.display.float_format = '{:20,.5f}'.format
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000

#### Formulas

In [2]:
def date_split(dates, position=0):
    ''' Splits festival dates and returns start date(position = 0) or end date(position = 1)'''
    dates = dates.split('-')
    if position:
        end_date = dates[1]
        end_date = datetime.strptime(end_date, '%B %d, %Y')
        return end_date
    else:
        start_date = dates[0] + ' ' + dates[1][-4:]
        start_date = datetime.strptime(start_date, '%B %d %Y')
        return start_date
    

def make_lower(lineup):
    ''' Takes artist lineup and returns list with names lowcase and whitespace stripped'''
    lineup_new = []
    for artist in lineup:
        artist = artist.strip()
        artist = artist.lower()
        lineup_new.append(artist)
    return lineup_new


def replace_genre(genre):
    genre = genre.replace(' ', '_')
    genre = genre.replace('-', '_')
    new_genre = replace_dict[genre]
    return new_genre
        

def get_genres(lineup):
    '''Takes artists in lineup and returns list of genres'''
    genre_list =[]
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
            artist_genres =[]
            for genre in genres:
                new_genre = replace_genre(genre)
                if new_genre not in artist_genres and new_genre != '':
                    artist_genres.append(new_genre)
            if artist_genres:
                genre_list.append(artist_genres)
        except KeyError:
            next
    return genre_list
    
    
def genre_dict(lineup):
    '''Takes artist lineup and returns genre counts'''
    artist_genres = defaultdict(int)
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
            for genre in genres:
                artist_genres[genre] += 1
        except KeyError:
            next
    return artist_genres


def no_spotify_info(lineup):
    '''Takes artist lineup and returns artist not in Spotify database'''
    unknown = []
    for artist in lineup:
        try:
            genres = artist_info[artist]['genres']
        except KeyError:
            unknown.append(artist)
    return unknown


def top_three(genre_list):
    if len(genre_list) < 4:
        return genre_list
    else:
        most_pop = []     
        for genre in genre_list:
            most_pop.append((all_genres[genre], genre))
        most_pop.sort(reverse=True)
        selection = []
        for i in range(0, 3):
            top_genre = most_pop[i][1]
            selection.append(top_genre)
        return selection

def make_genre_counts(genre_lists):
    lineup_genres = defaultdict(int)
    for genre_list in genre_lists:
        if genre_list:
            genre_list = top_three(genre_list)
            for genre in genre_list:
                lineup_genres[genre] += 1
    return lineup_genres


def user_genres(artists):
    genres_dict = defaultdict(int)
    for artist in artists:
        try:
            genre_result = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['genres']
            genre_list = []
            for genre in genre_result:
                new_genre = replace_genre(genre)
                if new_genre not in genre_list:
                    genre_list.append(new_genre)
            genre_list = top_three(genre_list)
            for genre in genre_list:
                genres_dict[genre] += 1
        except IndexError:
            next
    df = pd.DataFrame(genres_dict, index=["User",])
    df_norm = pd.DataFrame(preprocessing.normalize(df, norm='l1'), index=["User",], columns=list(df.columns))
    return df_norm


def add_weights(x):
    fest_name = x[1]
    lineup = list(festivals[festivals.name == fest_name]['lineup'])[0]
    count = 0
    for artist in user_artists:
        if artist in lineup:
            count += 1
    new_dist = x[0] + (count * 0.15)
    return new_dist


def artist_genre_replace(g_list):
    genre_string = ''
    if g_list:
        for i in range(0, len(g_list)):
            g_list[i] = replace_genre(g_list[i])
        new_genres = list(set(g_list))
        genre_string = ",".join(new_genres)
    return genre_string
            
    
def track_averages(artist):
    averages = defaultdict(int)
    if artist in artist_average:
        return artist_average[artist]
    else:
        count = 0
        danceability = 0
        energy = 0
        key = 0
        loudness = 0
        speechiness = 0
        acousticness = 0
        instrumentalness = 0
        liveness = 0
        valence = 0
        tempo = 0
        try:
            artist_id = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['uri']
            top_tracks = spotify.artist_top_tracks(artist_id)
            track_features = []
            for i in range(0, len(top_tracks['tracks'])):
                song_id = str(top_tracks['tracks'][i]['uri'])
                features = spotify.audio_features(song_id)
                track_features.append(features)
            for track in track_features:
                count += 1.0
                danceability += track[0]['danceability']
                energy += track[0]['energy']
                key += track[0]['key']
                loudness += track[0]['loudness']
                speechiness += track[0]['speechiness']
                acousticness += track[0]['acousticness']
                instrumentalness += track[0]['instrumentalness']
                liveness += track[0]['liveness']
                valence += track[0]['valence']
                tempo += track[0]['tempo']
        except TypeError:
                next
        if count == 0:
            count = 1
        averages['danceability'] = danceability / count
        averages['energy'] = energy / count
        averages['key'] = key / count
        averages['loudness'] = loudness / count
        averages['speechiness'] = speechiness / count
        averages['acousticness'] = acousticness / count
        averages['instrumentalness'] = instrumentalness / count
        averages['liveness'] = liveness / count
        averages['valence'] = valence / count
        averages['tempo'] = tempo / count
        try:
            averages['genres'] = spotify.search(q='artist:' + artist, type='artist')['artists']['items'][0]['genres']
        except IndexError:
            averages['genres'] = []
        return averages
    
    
def get_image(x):
    try:
        return artist_info[x]['images'][0]['url']
    except IndexError:
        return 'http://www2.pictures.zimbio.com/mp/RyOQVmpiyZB+O937YJarJVm+594x400.jpg'
    except KeyError:
        return 'http://www2.pictures.zimbio.com/mp/RyOQVmpiyZB+O937YJarJVm+594x400.jpg'

#### Read and Clean Scrapy Data

In [3]:
# Read in festival data
all_festivals = pd.read_json("festivals.json")

In [4]:
# Formatt columns
all_festivals['start_date'] = all_festivals.dates.apply(date_split)
all_festivals['end_date'] = all_festivals.dates.apply(lambda x: date_split(x, 1))
all_festivals['duration'] = all_festivals.end_date - all_festivals.start_date + timedelta(days=1)
all_festivals['lineup'] = all_festivals.lineup.apply(make_lower)
all_festivals['id'] = pd.Series(all_festivals.index)


# Reorder columns and sort rows by date 
column_order = [
    'id',
    'name', 
    'start_date', 
    'end_date', 
    'location', 
    'tickets', 
    'camping', 
    'website', 
    'description', 
    'lineup', 
    'url', 
    'poster', 
    'image'
]
all_festivals = all_festivals[column_order]
all_festivals.sort_values('start_date', inplace=True)
all_festivals = all_festivals.reset_index(drop=True)

# Remove festivals without lineups
have_lineup = []

for fest_lineup in all_festivals.lineup:
    if len(fest_lineup) >= 10:
        have_lineup.append(True)
    else:
        have_lineup.append(False)
        
festivals = all_festivals[have_lineup]
festivals = festivals.reset_index(drop=True)

# View dataframe head
festivals.head()

Unnamed: 0,id,name,start_date,end_date,location,tickets,camping,website,description,lineup,url,poster,image
0,16,Northside Festival Brooklyn 2017,2017-06-07,2017-06-11,"Brooklyn, NY",$75-$549,No,http://www.northsidefestival.com/,North Brooklyn is transformed into an urban fe...,"[miguel, dirty projectors, downtown boys, elvi...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...
1,1,Bonnaroo Music Festival 2017,2017-06-08,2017-06-11,"Manchester, TN",$324.50,Yes,http://www.bonnaroo.com/,,"[u2, lorde, travis scott, future islands, tory...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...
2,3,Blue Ox Music Festival 2017,2017-06-08,2017-06-10,"Eau Claire, WI",,Yes,http://blueoxmusicfestival.com/,,"[pert near sandstone, punch brothers, greensky...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...
3,18,Disc Jam 2017,2017-06-08,2017-06-11,"Stephentown, NY",,Yes,http://www.discjammusicfestival.com/,,"[dopapod, electron, kung fu, tauk, pink talkin...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...
4,17,CMA Music Festival 2017,2017-06-08,2017-06-11,"Nashville, TN",,No,http://www.cmaworld.com/cma-music-festival/,,"[a thousand horses, aaron watson, abby anderso...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...


#### Spotify API Calls (Spotify Data pickled below)

In [5]:
# Initialize Spotify API package
ccm = SpotifyClientCredentials(
    client_id = config.client_id, 
    client_secret = config.client_secret
)

spotify = spotipy.Spotify(client_credentials_manager=ccm)

In [6]:
with open('artist_info.pk1', 'rb') as picklefile:
    artist_info = pickle.load(picklefile)

In [10]:
# Dictionary with artist info
# comment out line below after first run
# artist_info = defaultdict(dict)

for lineup in tqdm(all_festivals.lineup):
    for artist in lineup:
        if artist not in artist_info:
            search_result = spotify.search(q='artist:' + artist, type='artist')
            try:
                artist_info[artist] = search_result['artists']['items'][0]
            except IndexError:
                next

# pickel for later
with open('artist_info.pk1', 'wb') as picklefile:
    pickle.dump(artist_info, picklefile)

100%|██████████| 204/204 [02:18<00:00,  1.47it/s]


In [7]:
with open('artist_tracks.pk1', 'rb') as picklefile:
    artist_tracks = pickle.load(picklefile)

In [12]:
# Dictionary with artist top tracks
# comment out line below after first run
# artist_tracks = defaultdict(dict)

for artist, info in tqdm(artist_info.items()):
    if artist not in artist_tracks:
        try:
            artist_id = info['uri']
            top_tracks = spotify.artist_top_tracks(artist_id)
            artist_tracks[artist] = top_tracks
        except KeyError:
            next
        
# pickel for later
with open('artist_tracks.pk1', 'wb') as picklefile:
    pickle.dump(artist_tracks, picklefile)

100%|██████████| 5932/5932 [05:03<00:00, 19.56it/s]


In [8]:
with open('audio_features.pk1', 'rb') as picklefile:
    audio_features = pickle.load(picklefile)

In [9]:
# Dictionary with track features
# comment out line below after first run
# audio_features = defaultdict(list)

for artist, track in tqdm(artist_tracks.items()):
    if artist not in audio_features:
        track_features = []
        for i in range(0, len(track['tracks'])):
            try:
                song_id = str(track['tracks'][i]['uri'])
                features = spotify.audio_features(song_id)
                track_features.append(features)
            except ValueError:
                next
        audio_features[artist] = track_features

# pickel for later
with open('audio_features.pk1', 'wb') as picklefile:
    pickle.dump(audio_features, picklefile)

100%|██████████| 5343/5343 [1:01:44<00:00,  1.44it/s]


In [10]:
with open('artist_average.pk1', 'rb') as picklefile:
    artist_average = pickle.load(picklefile)

In [11]:
# Dictionary with artist average track features
# comment out line below after first run
# artist_average = defaultdict(dict)

for artist, tracks in tqdm(audio_features.items()):
    if artist not in artist_average:
        averages = defaultdict(int)
        count = 0
        danceability = 0
        energy = 0
        key = 0
        loudness = 0
        speechiness = 0
        acousticness = 0
        instrumentalness = 0
        liveness = 0
        valence = 0
        tempo = 0
        for track in tracks:
            try:
                count += 1.0
                danceability += track[0]['danceability']
                energy += track[0]['energy']
                key += track[0]['key']
                loudness += track[0]['loudness']
                speechiness += track[0]['speechiness']
                acousticness += track[0]['acousticness']
                instrumentalness += track[0]['instrumentalness']
                liveness += track[0]['liveness']
                valence += track[0]['valence']
                tempo += track[0]['tempo']
            except TypeError:
                continue
        if count == 0:
            continue
        averages['danceability'] = danceability / count
        averages['energy'] = energy / count
        averages['key'] = key / count
        averages['loudness'] = loudness / count
        averages['speechiness'] = speechiness / count
        averages['acousticness'] = acousticness / count
        averages['instrumentalness'] = instrumentalness / count
        averages['liveness'] = liveness / count
        averages['valence'] = valence / count
        averages['tempo'] = tempo / count
        averages['genres'] = artist_info[artist]['genres']
        artist_average[artist] = averages
        
# pickel for later
with open('artist_average.pk1', 'wb') as picklefile:
    pickle.dump(artist_average, picklefile)

100%|██████████| 5343/5343 [00:00<00:00, 71305.69it/s]


### Music Festival Recommender

#### Get Genres

In [12]:
# Over 750 genres pulled from spotify, need to condense
genre_rename = pd.read_csv('music_genres.csv', header=None, names=['old_genre', 'new_genre'])

replace_dict = defaultdict(str)

for i in range(0, len(genre_rename)):
    replace_dict[genre_rename.ix[i,0]] = genre_rename.ix[i, 1]
    

In [13]:
# pickel for later
with open('replace_dict.pk1', 'wb') as picklefile:
    pickle.dump(replace_dict, picklefile)

In [14]:
# load pickeled spotify data
with open('artist_info.pk1', 'rb') as picklefile:
    artist_info = pickle.load(picklefile)

In [15]:
# Add genre counts to dataframe
festivals['genres'] = festivals['lineup'].apply(get_genres)

# List of bands not in spotify database
festivals['unknown'] = festivals['lineup'].apply(no_spotify_info)
festivals['num_bands'] = festivals['lineup'].apply(lambda x: len(x))
festivals['num_unknowns'] = festivals['unknown'].apply(lambda x: len(x))
festivals['unknown_percent'] = festivals['num_unknowns'] / festivals['num_bands']

In [16]:
with open('festivals.pk1', 'wb') as picklefile:
    pickle.dump(festivals, picklefile) 

In [17]:
# genres dictionary
all_genres = defaultdict(int)

for genre_list in festivals.genres:
    for artist_genres in genre_list:
        for genre in artist_genres:
            all_genres[genre] += 1
            

In [18]:
with open('all_genres.pk1', 'wb') as picklefile:
    pickle.dump(all_genres, picklefile)  

In [19]:
festivals['genres_revised'] = festivals['genres'].apply(make_genre_counts)

In [20]:
# Make DataFrame of festival genres
first = festivals.ix[0,18]
fest_genres = pd.DataFrame(first, index=[festivals.ix[0,1],])

for i in range(1, len(festivals)):
    d = festivals.ix[i, 18]
    df = pd.DataFrame(d, index=[festivals.ix[i,1],])
    fest_genres = fest_genres.append(df)
    
fest_genres = fest_genres.fillna(0)

In [21]:
fest_genres_norm = pd.DataFrame(
    preprocessing.normalize(fest_genres, norm='l1'), 
    columns=list(fest_genres.columns),
    index=list(fest_genres.index)
)

In [22]:
# pickel for later
with open('fest_genres_norm.pk1', 'wb') as picklefile:
    pickle.dump(fest_genres_norm, picklefile)

In [23]:
# load pickeled genre data
with open('fest_genres_norm.pk1', 'rb') as picklefile:
    fest_genres_norm = pickle.load(picklefile)

In [24]:
user_artists = ['cage the elephant', 'chance the rapper', 'metric']

user = user_genres(user_artists)
user

Unnamed: 0,alternative_rock,electronic,pop,rap,rock
User,0.14286,0.14286,0.28571,0.14286,0.28571


In [25]:
user_df = fest_genres_norm.append(user)
user_df = user_df.fillna(0)
user_df

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
Northside Festival Brooklyn 2017,0.08219,0.0,0.0,0.0,0.0,0.0,0.0,0.0137,0.0,0.0,0.19178,0.0274,0.0137,0.12329,0.0,0.0137,0.0,0.0137,0.0137,0.0,0.0,0.0,0.21918,0.0137,0.0411,0.0274,0.0,0.0,0.16438,0.0,0.0,0.0,0.0,0.0137,0.0,0.0274
Bonnaroo Music Festival 2017,0.05714,0.01786,0.01071,0.00357,0.0,0.0,0.03571,0.0,0.0,0.01071,0.125,0.0,0.00357,0.08571,0.0,0.05,0.08571,0.03214,0.01071,0.0,0.00714,0.00714,0.2,0.0,0.0,0.025,0.025,0.00357,0.08929,0.01786,0.00357,0.00357,0.0,0.075,0.00714,0.00714
Blue Ox Music Festival 2017,0.0,0.02439,0.02439,0.0,0.0,0.0,0.36585,0.0,0.0,0.0,0.0,0.0,0.0,0.34146,0.0,0.0,0.0,0.0,0.0,0.0,0.02439,0.0,0.04878,0.0,0.0,0.0,0.0,0.0,0.17073,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Disc Jam 2017,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0,0.3125,0.0,0.125,0.0625,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0,0.0625,0.0,0.0625,0.0,0.0,0.0,0.0,0.0625,0.0625,0.0
CMA Music Festival 2017,0.01149,0.04598,0.0,0.0,0.0,0.0,0.53448,0.00575,0.0,0.0,0.0,0.0,0.0,0.00575,0.0,0.0,0.0,0.01149,0.0,0.0,0.0,0.27586,0.04023,0.0,0.0,0.0,0.0,0.0,0.06322,0.00575,0.0,0.0,0.0,0.0,0.0,0.0
Mysteryland USA 2017,0.01351,0.00676,0.0,0.0,0.0,0.0,0.0,0.0,0.02027,0.01351,0.24324,0.0,0.0,0.01351,0.0,0.04054,0.25,0.00676,0.00676,0.0,0.00676,0.0,0.10135,0.0,0.0,0.00676,0.02027,0.00676,0.04054,0.01351,0.0,0.03378,0.0,0.14189,0.01351,0.0
Spring Awakening 2017,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01282,0.33333,0.0,0.0,0.0,0.0,0.01282,0.33333,0.0,0.0,0.0,0.0,0.0,0.11538,0.0,0.0,0.0,0.0,0.0,0.01282,0.0,0.0,0.01282,0.01282,0.15385,0.0,0.0
LIVE 105’s BFD 2017,0.13208,0.01887,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.18868,0.0,0.0,0.0,0.0,0.0,0.03774,0.01887,0.0,0.0,0.03774,0.0,0.26415,0.0,0.01887,0.03774,0.0,0.0,0.13208,0.0566,0.0,0.0,0.0,0.03774,0.0,0.01887
LaureLive 2017,0.08511,0.02128,0.08511,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.06383,0.0,0.0,0.17021,0.0,0.0,0.02128,0.08511,0.0,0.0,0.0,0.0,0.29787,0.0,0.0,0.0,0.0,0.0,0.14894,0.02128,0.0,0.0,0.0,0.0,0.0,0.0
Punk Rock Bowling NJ 2017,0.09677,0.0,0.03226,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.09677,0.0,0.0,0.03226,0.0,0.0,0.0,0.0,0.0,0.19355,0.0,0.06452,0.03226,0.22581,0.0,0.0,0.0,0.19355,0.03226,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
fest_genres_norm.ix[65:66, :]

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
Northwest String Summit 2017,0.0,0.0,0.04444,0.0,0.0,0.0,0.35556,0.0,0.0,0.0,0.0,0.0,0.0,0.37778,0.0,0.0,0.0,0.02222,0.0,0.0,0.0,0.0,0.04444,0.0,0.0,0.0,0.0,0.0,0.15556,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [27]:
user_case = user_df.ix[-1,:]
distances = pd.DataFrame(
    cosine_similarity(fest_genres_norm, user_case.reshape(1, -1)),
    index = list(fest_genres_norm.index),
    columns=['distance']
)

In [28]:
distances['name'] = distances.index
distances['weighted_dist'] = distances.apply(add_weights, axis=1)
distances = distances.sort_values("weighted_dist", ascending=False)


In [29]:
distances.reset_index(drop=True)

Unnamed: 0,distance,name,weighted_dist
0,0.77068,Bonnaroo Music Festival 2017,1.07068
1,0.75721,Lollapalooza Chicago 2017,1.05721
2,0.84221,Float Fest 2017,0.99221
3,0.81255,Austin City Limits 2017,0.96255
4,0.81002,Osheaga 2017,0.96002
5,0.79944,Firefly Music Festival 2017,0.94944
6,0.79438,Lost Lake Festival 2017,0.94438
7,0.79013,Karoondinha Festival 2017,0.94013
8,0.74548,Eaux Claires 2017,0.89548
9,0.89501,KAABOO 2017,0.89501


In [30]:
distances = pd.merge(distances, festivals, on='name')
distances.drop(
    [
        'distance', 
        'description', 
        'url', 
        'num_bands', 
        'num_unknowns', 
        'unknown_percent', 
        'genres',
        'unknown'
    ], 
    axis=1, 
    inplace=True
)

In [31]:
distances

Unnamed: 0,name,weighted_dist,id,start_date,end_date,location,tickets,camping,website,lineup,poster,image,genres_revised
0,Bonnaroo Music Festival 2017,1.07068,1,2017-06-08,2017-06-11,"Manchester, TN",$324.50,Yes,http://www.bonnaroo.com/,"[u2, lorde, travis scott, future islands, tory...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 9, u'house': 24, u'pop': 56, u'dubs..."
1,Lollapalooza Chicago 2017,1.05721,127,2017-08-03,2017-08-06,"Chicago, IL",$120-$335,No,http://www.lollapalooza.com/,"[21 savage, 3lou, 6lack, 888, alison wonderlan...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 6, u'house': 31, u'pop': 73, u'dubs..."
2,Float Fest 2017,0.99221,104,2017-07-22,2017-07-23,"San Marcos, TX",$99,Yes,http://floatfest.net/,"[snakeships, neon indian, moon taxi, mike jone...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'hip_hop': 3, u'indie': 3, u'house': 1, u'al..."
3,Austin City Limits 2017,0.96255,173,2017-10-06,2017-10-15,"Austin, TX",,No,http://www.aclfestival.com/,"[jay z, red hot chili peppers, chance the rapp...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 8, u'house': 14, u'pop': 54, u'rap'..."
4,Osheaga 2017,0.96002,67,2017-08-04,2017-08-06,"Montreal, QC",$320 CAD,No,http://www.osheaga.com/,"[liam gallagher, death from above 1979, london...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'techno': 3, u'ambient': 4, u'classical': 1,..."
5,Firefly Music Festival 2017,0.94944,11,2017-06-15,2017-06-18,"Dover, DE",,Yes,http://fireflyfestival.com/,"[bob dylan, chance the rapper, the weeknd, twe...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 6, u'house': 23, u'pop': 59, u'dubs..."
6,Lost Lake Festival 2017,0.94438,188,2017-10-20,2017-10-22,"Phoenix, AZ",,,https://www.lostlakefestival.com/,"[the killers, chance the rapper, major lazer, ...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'latin': 2, u'hip_hop': 9, u'house': 7, u'ro..."
7,Karoondinha Festival 2017,0.94013,105,2017-07-21,2017-07-23,"Centre Hall, PA",$249,Yes,http://karoondinha.com,"[chance the rapper, john legend, odesza, param...",[],https://www.musicfestivalwizard.com/wp-content...,"{u'trance': 1, u'alternative_rock': 6, u'hip_h..."
8,Eaux Claires 2017,0.89548,14,2017-06-16,2017-06-17,"Eau Claire, WI",,Yes,http://eauxclaires.com/,"[big red machine, collections of colonies of b...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'latin': 1, u'hip_hop': 2, u'indie': 2, u'ho..."
9,KAABOO 2017,0.89501,165,2017-09-15,2017-09-17,"San Diego, CA",$219,No,http://www.kaabooexperience.com/,"[p!nk, tom petty and the heartbreakers, jane’s...",[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"{u'indie': 3, u'house': 9, u'pop': 28, u'rap':..."


In [32]:
distances.to_dict(orient='index')

{0: {u'camping': u' Yes',
  'end_date': Timestamp('2017-06-11 00:00:00'),
  'genres_revised': defaultdict(int,
              {'alternative_rock': 16,
               'ambient': 5,
               'blues': 3,
               'classic_rock': 1,
               'country': 10,
               'dubstep': 3,
               'electronic': 35,
               'experimental': 1,
               'folk': 24,
               'hip_hop': 14,
               'house': 24,
               'indie': 9,
               'jazz': 3,
               'metal': 2,
               'other': 2,
               'pop': 56,
               'r&b': 7,
               'rap': 7,
               'reggae': 1,
               'rock': 25,
               'soul': 5,
               'swing': 1,
               'techno': 1,
               'trap': 21,
               'world': 2,
               'worship': 2}),
  'id': 1,
  u'image': u'https://www.musicfestivalwizard.com/wp-content/uploads/2015/01/Bonnaroo_Logo-300x150.png',
  u'lineup': [u'u2',
   u'lor

In [33]:
festivals.head()

Unnamed: 0,id,name,start_date,end_date,location,tickets,camping,website,description,lineup,url,poster,image,genres,unknown,num_bands,num_unknowns,unknown_percent,genres_revised
0,16,Northside Festival Brooklyn 2017,2017-06-07,2017-06-11,"Brooklyn, NY",$75-$549,No,http://www.northsidefestival.com/,North Brooklyn is transformed into an urban fe...,"[miguel, dirty projectors, downtown boys, elvi...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...,"[[pop, hip_hop, r&b, rap, trap], [dance, alter...",[],31,0,0.0,"{u'jazz': 1, u'hip_hop': 1, u'experimental': 1..."
1,1,Bonnaroo Music Festival 2017,2017-06-08,2017-06-11,"Manchester, TN",$324.50,Yes,http://www.bonnaroo.com/,,"[u2, lorde, travis scott, future islands, tory...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[classic_rock, rock, alternative_rock], [pop]...","[bluegrass superjam, case bloom & dj rate (the...",151,7,0.04636,"{u'indie': 9, u'house': 24, u'pop': 56, u'dubs..."
2,3,Blue Ox Music Festival 2017,2017-06-08,2017-06-10,"Eau Claire, WI",,Yes,http://blueoxmusicfestival.com/,,"[pert near sandstone, punch brothers, greensky...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[country], [country, pop, folk], [country, fo...","[sam bush band, grateful ball]",29,2,0.06897,"{u'country': 15, u'rock': 7, u'metal': 1, u'po..."
3,18,Disc Jam 2017,2017-06-08,2017-06-11,"Stephentown, NY",,Yes,http://www.discjammusicfestival.com/,,"[dopapod, electron, kung fu, tauk, pink talkin...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...,"[[folk], [folk], [folk], [house, world], [hip_...","[pink talking fish, gubbilidis, leila harrison...",54,6,0.11111,"{u'hip_hop': 2, u'classical': 1, u'house': 1, ..."
4,17,CMA Music Festival 2017,2017-06-08,2017-06-11,"Nashville, TN",,No,http://www.cmaworld.com/cma-music-festival/,,"[a thousand horses, aaron watson, abby anderso...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...,"[[country, other], [country], [country, other]...","[brandon lay, drake white & the big fire, rest...",138,4,0.02899,"{u'alternative_rock': 2, u'indie': 2, u'countr..."


### Artist Recommender

In [34]:
with open('artist_average.pk1', 'rb') as picklefile:
    artist_average = pickle.load(picklefile)

In [35]:
user_artists = ['the weeknd', 'chance the rapper', 'muse']

In [36]:
user_artist_average = defaultdict(dict)
for i, artist in enumerate(user_artists):
    user_artist_average['user' + str(i)] = track_averages(artist)

In [37]:
user_df = pd.DataFrame(user_artist_average).T

In [38]:
artists = pd.DataFrame(artist_average).T
artists = artists.append(user_df)
artists['genres'] = artists['genres'].apply(artist_genre_replace)
artists.tail()

Unnamed: 0,acousticness,danceability,energy,genres,instrumentalness,key,liveness,loudness,speechiness,tempo,valence
ó,0.20832,0.6707,0.6999,"house,pop",0.02001,5.1,0.11609,-6.2669,0.06235,110.5907,0.4645
‪rob zombie,0.00596,0.6048,0.9195,"alternative_rock,metal,rock",0.06666,5.6,0.27454,-4.7254,0.10283,118.6259,0.447
user0,0.17535,0.6563,0.6019,pop,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438
user1,0.41616,0.6574,0.5105,",rap",0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46
user2,0.01988,0.5405,0.8236,"rock,metal,alternative_rock",0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376


In [39]:
df_genre = artists['genres'].str.get_dummies(sep=',')
df_genre.head()

Unnamed: 0,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
#familygrind,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
'brink' brinkman,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
(sandy) alex g,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1
070 shake,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10 string symphony,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [40]:
artist_recommend = pd.concat([artists, df_genre], axis=1)

In [41]:
artist_recommend.drop('genres', axis=1, inplace=True)
artist_recommend.fillna(0)
artist_recommend.head()

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
#familygrind,0.14474,0.591,0.6881,0.0,6.6,0.2233,-8.7772,0.3968,84.8542,0.5679,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
'brink' brinkman,0.4659,0.617,0.4112,0.00011,4.9,0.2129,-9.5486,0.03211,110.473,0.6034,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
(sandy) alex g,0.14374,0.4551,0.5978,0.28403,5.2,0.24055,-6.9032,0.05699,132.9916,0.4145,1,1,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,1,0,0,0,0,1,1,1,1,0,0,1,0,0,0,0,0,0,1
070 shake,0.268,0.64743,0.52929,0.0,4.71429,0.21657,-8.55971,0.09321,114.36929,0.3019,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
10 string symphony,0.9573,0.7103,0.17343,0.01511,3.6,0.09985,-11.3705,0.04971,116.1923,0.5246,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [42]:
user_fest = "Firefly Music Festival 2017"
lineup = festivals[festivals.name == user_fest]['lineup']
recommend_df = artist_recommend.loc[list(lineup)[0]]
recommend_df = recommend_df.fillna(0)
recommend_df

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
bob dylan,0.61179,0.4877,0.35543,0.01829,4.7,0.15351,-15.0576,0.04494,138.7041,0.4949,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
chance the rapper,0.41616,0.6574,0.5105,0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the weeknd,0.17535,0.6563,0.6019,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
twenty one pilots,0.08144,0.6551,0.6396,0.00013,4.7,0.11614,-6.5414,0.06305,114.766,0.54,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
muse,0.01988,0.5405,0.8236,0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
flume,0.22171,0.5466,0.6061,0.00063,7.1,0.1194,-5.8825,0.11562,111.9679,0.3199,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
weezer,0.02798,0.5517,0.7455,0.03499,5.4,0.13042,-5.8304,0.05187,109.5974,0.54993,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the shins,0.1584,0.5502,0.7153,0.01884,2.8,0.25556,-6.8871,0.03715,125.2453,0.6268,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
thirty seconds to mars,0.028,0.4457,0.8518,0.02992,6.2,0.24206,-4.5112,0.06744,147.6048,0.2156,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
dillon francis,0.04023,0.6848,0.8557,0.08756,4.1,0.26248,-3.5772,0.10561,121.5386,0.5184,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [43]:
user_df = artist_recommend.loc[list(user_df.index)]
user_df = user_df.fillna(0)
user_df

Unnamed: 0,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,speechiness,tempo,valence,alternative_rock,ambient,blues,classic_rock,classical,comedy,country,dance,disco,dubstep,electronic,emo,experimental,folk,funk,hip_hop,house,indie,jazz,latin,metal,other,pop,psychedelic,punk,r&b,rap,reggae,rock,soul,swing,techno,trance,trap,world,worship
user0,0.17535,0.6563,0.6019,0.0,3.4,0.1709,-7.0208,0.12792,123.814,0.438,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
user1,0.41616,0.6574,0.5105,0.0,5.1,0.13641,-7.7691,0.27455,127.9201,0.46,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0
user2,0.01988,0.5405,0.8236,0.01522,6.9,0.2087,-5.0495,0.06017,126.9441,0.4376,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0


In [49]:
artist_distances = pd.DataFrame(list(recommend_df.index), index=list(recommend_df.index), columns=['names'])

for i in range(0, len(user_df)):
    user_case = user_df.ix[i,:]
    distances = pd.DataFrame(
        euclidean_distances(recommend_df, user_case.reshape(1, -1)),
        index = list(recommend_df.index),
        columns=['distance' + str(i)]
    )
    artist_distances = pd.concat([artist_distances, distances], axis=1)

In [50]:
artist_distances['min'] = artist_distances[list(artist_distances.columns[1:])].min(axis=1)

In [51]:
artist_distances.sort_values("min")

Unnamed: 0,names,distance0,distance1,distance2,min
chance the rapper,chance the rapper,4.73281,0.0,3.98867,0.0
the weeknd,the weeknd,0.0,4.73281,5.47965,0.0
muse,muse,5.47965,3.98867,0.0,0.0
vita and the woolf,vita and the woolf,5.89949,1.85818,3.65758,1.85818
alex wiley,alex wiley,2.15128,4.56241,5.25109,2.15128
bishop briggs,bishop briggs,5.27155,2.18754,4.3799,2.18754
salt cathedral,salt cathedral,2.24925,3.46166,4.73516,2.24925
dead man fall,dead man fall,4.22232,2.35691,3.50855,2.35691
sub-radio,sub-radio,5.53796,2.36228,2.51235,2.36228
t-pain,t-pain,2.36423,4.58746,4.9843,2.36423


### Data for D3 visualization

In [376]:
d3 = deepcopy(festivals)

In [377]:
d3.name = d3['name'].apply(lambda x: x[:-5])

In [378]:
d3 = d3.drop([
        'tickets', 
        'camping', 
        'description', 
        'url', 
        'unknown', 
        'num_bands', 
        'num_unknowns', 
        'unknown_percent',
        'genres', 
        'genres_revised',
        'lineup', 
        'image'
    ], axis=1)

In [379]:
d3.head()

Unnamed: 0,name,start_date,end_date,location,website,poster
0,Savannah Music Festival,2017-03-23,2017-04-08,"Savannah, GA",http://www.savannahmusicfestival.org/,[]
1,Ultra Miami,2017-03-24,2017-03-26,"Miami, FL",http://www.ultramusicfestival.com/,[https://www.musicfestivalwizard.com/wp-conten...
2,Winter Wonder Grass Tahoe,2017-03-30,2017-04-02,"Squaw Valley, CA",http://www.winterwondergrasstahoe.com/,[https://www.musicfestivalwizard.com/wp-conten...
3,Desert Hearts,2017-03-31,2017-04-03,"Warner Springs, CA",http://www.deserthearts.us/,[]
4,Fool's Paradise,2017-03-31,2017-04-01,"St. Augustine, FL",http://www.foolsparadisefl.com/,[https://www.musicfestivalwizard.com/wp-conten...


In [None]:
from geopy.geocoders import Nominatim
from time import sleep

geolocator = Nominatim()

def get_lat_long(loc):
    try:
        location = geolocator.geocode(loc)
        return [location.latitude, location.longitude]
    except:
        print "failed {}".format(loc)
        pass

coordinates = []
for location in d3.location:
    coordinates.append(get_lat_long(location))

In [352]:
coordinates[37] = get_lat_long("Atlanta, GA")
coordinates[58] = get_lat_long("Las Vegas, NV")
coordinates[68] = get_lat_long("Toronto, ON")
coordinates[75] = get_lat_long("Nashville, TN")
coordinates[77] = get_lat_long("Bethel, NY")
coordinates[105] = get_lat_long("Quincy, CA")
coordinates[108] = get_lat_long("Quebec City, QC")
coordinates[125] = get_lat_long("Squaw Valley, CA")
coordinates[135] = get_lat_long("Detroit, MI")
coordinates[137] = get_lat_long("Camrose, AB")
coordinates[148] = get_lat_long("Calgary, AB, CA")
coordinates[140] = get_lat_long("Montreal, Canada")
coordinates[98] = get_lat_long("Montebello, Canada")
coordinates[103] = get_lat_long("Montreal, Canada")
coordinates[146] = get_lat_long("Ochoco National Forest, OR")

failed Montebello, QB, CA
failed Montreal, QB, CA
failed Detroit, MI
failed Montreal, QB, CA
failed Big Summit Prairie, OR


In [380]:
d3['coordinates'] = pd.Series(coordinates)

In [382]:
d3.index = list(d3.name)
d3 = d3.drop('name', axis=1)

In [383]:
d3.head()

Unnamed: 0,start_date,end_date,location,website,poster,coordinates
Savannah Music Festival,2017-03-23,2017-04-08,"Savannah, GA",http://www.savannahmusicfestival.org/,[],"[32.0835407, -81.0998341]"
Ultra Miami,2017-03-24,2017-03-26,"Miami, FL",http://www.ultramusicfestival.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[25.7742658, -80.1936588]"
Winter Wonder Grass Tahoe,2017-03-30,2017-04-02,"Squaw Valley, CA",http://www.winterwondergrasstahoe.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[36.7402261, -119.2467849]"
Desert Hearts,2017-03-31,2017-04-03,"Warner Springs, CA",http://www.deserthearts.us/,[],"[33.2822596, -116.6336302]"
Fool's Paradise,2017-03-31,2017-04-01,"St. Augustine, FL",http://www.foolsparadisefl.com/,[https://www.musicfestivalwizard.com/wp-conten...,"[29.8946952, -81.3145394]"


In [411]:
d3_data = defaultdict(list)

for index in d3.index:
    d3_data[index] = [
        d3.loc[index, 'coordinates'][0], 
        d3.loc[index, 'coordinates'][1],
        d3.loc[index, 'start_date'].strftime('%B %d %Y'), 
        d3.loc[index, 'end_date'].strftime('%B %d %Y'), 
        d3.loc[index, 'location'], 
        d3.loc[index, 'website'], 
        d3.loc[index, 'poster']
    ]

In [412]:
d3_data

defaultdict(list,
            {u'4 Peaks Music Festival': [44.0581728,
              -121.3153095,
              'June 15 2017',
              'June 18 2017',
              u'Bend, OR',
              u'http://4peaksmusic.com/',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2016/11/4-Peaks-2017-Lineup-oster.jpg']],
             u'Arise Music Festival': [40.3977612,
              -105.07498,
              'August 04 2017',
              'August 07 2017',
              u'Loveland, CO',
              u'http://www.arisefestival.com',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2016/09/ARISE-2017-Lineup-Festival.jpg']],
             u'Arroyo Seco Weekend': [34.1476452,
              -118.1444778,
              'June 24 2017',
              'June 25 2017',
              u'Pasadena, Ca',
              u'https://www.arroyosecoweekend.com/',
              [u'https://www.musicfestivalwizard.com/wp-content/uploads/2017/03/Arroyo-Seco-2017-Lineu

In [413]:
import json
with open('map_data.json', 'w') as fp:
    json.dump(d3_data, fp)

u'https://i.scdn.co/image/1c903e1c9fb2ffb682de31ad1f66eb8bc86a2b69'

In [None]:
kmeans = KMeans(n_clusters=8, random_state=0)
kmeans.fit(fest_genres_norm)

In [None]:
pd.Series(kmeans.labels_).value_counts()

In [None]:
fest_genres['clusters'] = kmeans.labels_

In [None]:
single_cluster = fest_genres[fest_genres['clusters'] == 7]
festivals[festivals['name'].isin(list(single_cluster.index))]