In [4]:
import pandas as pd
from datetime import datetime, timedelta
from collections import defaultdict
import spotipy
import json
import pickle

%matplotlib inline

pd.options.display.float_format = '{:20,.2f}'.format
pd.options.display.max_columns = 15
pd.options.display.max_rows = 200

In [5]:
def date_split(dates, position=0):
    dates = dates.split('-')
    if position:
        end_date = dates[1]
        end_date = datetime.strptime(end_date, '%B %d, %Y')
        return end_date
    else:
        start_date = dates[0] + ' ' + dates[1][-4:]
        start_date = datetime.strptime(start_date, '%B %d %Y')
        return start_date
    

def make_lower(lineup):
    lineup_new = []
    for band in lineup:
        band = band.strip()
        band = band.lower()
        lineup_new.append(band)
    return lineup_new
        

In [10]:
# Read in festival data
festivals = pd.read_json("musicfest/pretty_festivals1.json")

# Formatt columns
festivals['start_date'] = festivals.dates.apply(date_split)
festivals['end_date'] = festivals.dates.apply(lambda x: date_split(x, 1))
festivals['duration'] = festivals.end_date - festivals.start_date + timedelta(days=1)
festivals['lineup'] = festivals.lineup.apply(make_lower)


# Reorder columns and sort rows by date 
festivals = festivals[['name', 'start_date', 'end_date', 'location', 'tickets', 'camping', 'website', 'description', 'lineup', 'url', 'poster', 'image']]
festivals.sort_values('start_date', inplace=True)
festivals = festivals.reset_index(drop=True)

# View dataframe head
festivals.head()

Unnamed: 0,name,start_date,end_date,location,tickets,camping,website,description,lineup,url,poster,image
0,Savannah Music Festival 2017,2017-03-23,2017-04-08,"Savannah, GA",,No,http://www.savannahmusicfestival.org/,THE SAVANNAH MUSIC FESTIVAL IS DEDICATED TO PR...,"[the avett brothers, jason isbell, nikki lane,...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...
1,Ultra Miami 2017,2017-03-24,2017-03-26,"Miami, FL",,No,http://www.ultramusicfestival.com/,"FOR EDM FANS AROUND THE WORLD, ULTRA IN MIAMI ...","[chase & status, cypress hill, ice cube, justi...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...
2,Winter Wonder Grass Tahoe 2017,2017-03-30,2017-04-02,"Squaw Valley, CA",,No,http://www.winterwondergrasstahoe.com/,"Lively national, regional and local bluegrass ...","[greensky bluegrass, yonder mountain string ba...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...
3,Desert Hearts 2017,2017-03-31,2017-04-03,"Warner Springs, CA",,Yes,http://www.deserthearts.us/,Desert Hearts is a bi-annual music and arts fe...,"[ardalan, atish, ben seagren, christian martin...",https://www.musicfestivalwizard.com/festivals/...,[],https://www.musicfestivalwizard.com/wp-content...
4,Fool's Paradise 2017,2017-03-31,2017-04-01,"St. Augustine, FL",$65-$250,Yes,http://www.foolsparadisefl.com/,Fool's Paradise is bringing your favorite band...,"[lettuce, dumpstaphunk, the floozies, joe russ...",https://www.musicfestivalwizard.com/festivals/...,[https://www.musicfestivalwizard.com/wp-conten...,https://www.musicfestivalwizard.com/wp-content...


In [7]:
# Initialize Spotify API package
spotify = spotipy.Spotify()

# Dictionary with artist info
band_info = defaultdict(dict)

for lineup in festivals.lineup:
    for band in lineup:
        try:
            search_result = spotify.search(q='artist:' + band, type='artist')
            band_info[band] = search_result['artists']['items'][0]
        except:
            next

retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs
retrying ...0secs


In [10]:
# pickel for later
with open('band_info.pk1', 'wb') as picklefile:
    pickle.dump(band_info, picklefile)