In [1]:
import os
import requests
import csv
from collections import OrderedDict
import pandas as pd
from time import sleep
import numpy as np

In [2]:
api_key = open("ticketmasterkey.txt").read().strip()
req_root = "https://app.ticketmaster.com/discovery/v2/"

In [59]:
def get_req(req_type="events", params={}):
    params_str = ""
    for k,v in params.items():
        params_str += "{}={}&".format(k,v)
    params_str += "size=200&"
    params_str += "sort=onSaleStartDate,asc&"
    params_str += "apikey={}".format(api_key)
    return "{}{}.json?{}".format(req_root, req_type,params_str)

In [12]:
def get_attrs(row):
    DEFAULT_NUM_ATTRACTIONS = 5
    DEFAULT_NUM_PRESALES = 15
    
    columns = ['numPresales','id','name','date','time','saleStart','saleEnd','numClassifications',
        'segment','genre','subGenre','numPriceRanges','minPrice','maxPrice','numVenues',
        'venueName','city','state','country']
    
    for num in range(DEFAULT_NUM_ATTRACTIONS):
        columns.append("attraction_{}".format(num))
    
    for num in range(DEFAULT_NUM_PRESALES):
        columns.append("presale_{}".format(num))
        columns.append("presaleStart_{}".format(num))
        columns.append("presaleEnd_{}".format(num))
        
    final = pd.DataFrame(index=[0], columns=columns)
    
    final.iloc[0]["id"] = row.get("id")
    final.iloc[0]["name"] = row.get("name")
    final.iloc[0]["date"] = row["dates"]["start"].get("localDate")
    final.iloc[0]["time"] = row["dates"]["start"].get("localTime")
    
    # Attractions
    if "attractions" in row["_embedded"]:
        for num, attraction in enumerate(row["_embedded"]["attractions"]):
            if num >= DEFAULT_NUM_ATTRACTIONS:
                break
            final.iloc[0]["attraction_{}".format(num)] = attraction.get("name")
            
    # Sales
    sales = row["sales"]
    final.iloc[0]["saleStart"] = sales["public"].get("startDateTime")
    final.iloc[0]["saleEnd"] = sales["public"].get("endDateTime")
    
    if "presales" in sales:
        final.iloc[0]["numPresales"] = len(sales["presales"])
        for num, presale in enumerate(sales["presales"]):
            final.iloc[0]["presaleStart_{}".format(num)] = presale.get("startDateTime")
            final.iloc[0]["presaleEnd_{}".format(num)] = presale.get("endDateTime")
            final.iloc[0]["presale_{}".format(num)] = presale.get("name")

    # Classifications
    if "classifications" in row:
        final.iloc[0]["numClassifications"] = len(row["classifications"])
        classifications = row["classifications"][0]
        if "segment" in classifications:
            final.iloc[0]["segment"] = classifications["segment"].get("name")
        if "genre" in classifications:
            final.iloc[0]["genre"] = classifications["genre"].get("name")
        if "subGenre" in classifications:
            final.iloc[0]["subGenre"] = classifications["subGenre"].get("name")
    
    # Prices
    if "priceRange" in row:
        final.iloc[0]["numPriceRanges"] = len(row["priceRanges"])
        final.iloc[0]["minPrice"] = row["priceRanges"][0].get("min")
        final.iloc[0]["maxPrice"] = row["priceRanges"][0].get("max")
    
    # Venues
    final.iloc[0]["numVenues"] = len(row["_embedded"]["venues"])
    
    venue = row["_embedded"]["venues"][0]
    final.iloc[0]["venueName"] = venue.get("name")
    if "city" in venue:
        final.iloc[0]["city"] = venue["city"].get("name")
    if "state" in venue:
        final.iloc[0]["state"] = venue["state"].get("stateCode")
    if "country" in venue:
        final.iloc[0]["country"] = venue["country"].get("countryCode")
    
    return final

In [15]:
def do_all(params = {}, filename="sales.csv", num_pages=5, verbose=False):
    
    write_headers = False if os.path.exists(filename) else True    
    if write_headers:
        ids = set()
    else:
        ids = set(pd.DataFrame.from_csv(filename)[["id"]].values.flat)
    
    with open(filename, 'a') as f:
        for num in range(num_pages):
            if verbose:
                print("Page number: {} / {}".format(num, num_pages),end="\r")
            req = get_req(params={**params, **{"page": num}})
            response = requests.get(req)
            data = response.json()
            if "_embedded" in data:
                if "events" in data["_embedded"]:
                    rows = data["_embedded"].get("events")
            
                    for row in rows:
                        attrs = get_attrs(row)
                        ID = attrs[["id"]].iloc[0][0]
                        if ID not in ids:
                            attrs.to_csv(f, header=write_headers)
                            write_headers = False
                            ids.add(ID)
            if data["page"]["totalElements"] < (num + 1) * 200:
                break
    if verbose:
        print("Complete {}\nSaved to {}".format(params, filename))

## Cities and States

In [5]:
states = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA',
          'HI','ID','IL','IN','IA','KS','KY','LA','ME','MD',
          'MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ',
          'NM','NY','NC','ND','OH','OK','OR','PA','RI','SC',
          'SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

In [64]:
cities = ['New York','Los Angeles','Chicago','Houston','Phoenix',
          'Philadelphia','San Antonio','San Diego','Dallas','San Jose',
          'Austin','Jacksonville','San Francisco','Columbus','Indianapolis',
          'Fort Worth','Charlotte','Seattle','Denver','El Paso',
          'Washington','Boston','Detroit','Nashville','Memphis',
          'Portland','Oklahoma City','Las Vegas','Louisville','Baltimore',
          'Milwaukee','Albuquerque','Tucson','Fresno','Sacramento','Mesa',
          'Kansas City','Atlanta','Long Beach','Colorado Springs','Raleigh',
          'Miami','Virginia Beach','Omaha','Oakland','Minneapolis','Tulsa',
          'Arlington','New Orleans','Wichita','Cleveland','Tampa',
          'Bakersfield','Aurora','Honolulu','Anaheim','Santa Ana',
          'Corpus Christi','Riverside','Lexington','St. Louis','Stockton',
          'Pittsburgh','Saint Paul','Cincinnati','Anchorage','Henderson',
          'Greensboro','Plano','Newark','Lincoln','Toledo','Orlando','Chula Vista',
          'Irvine','Fort Wayne','Jersey City','Durham','St. Petersburg',
          'Laredo','Buffalo','Madison','Lubbock','Chandler','Scottsdale',
          'Glendale','Reno','Norfolk','Winston–Salem','North Las Vegas',
          'Irving','Chesapeake','Gilbert','Hialeah','Garland','Fremont',
          'Baton Rouge','Richmond','Boise','San Bernardino','Spokane',
          'Des Moines','Modesto','Birmingham','Tacoma','Fontana','Rochester',
          'Oxnard','Moreno Valley','Fayetteville','Aurora','Glendale','Yonkers',
          'Huntington Beach','Montgomery','Amarillo','Little Rock',
          'Akron','Columbus','Augusta','Grand Rapids','Shreveport',
          'Salt Lake City','Huntsville','Mobile','Tallahassee',
          'Grand Prairie','Overland Park','Knoxville','Port St. Lucie',
          'Worcester','Brownsville','Tempe','Santa Clarita','Newport News',
          'Cape Coral','Providence','Fort Lauderdale','Chattanooga','Rancho Cucamonga',
          'Oceanside','Santa Rosa','Garden Grove','Vancouver','Sioux Falls',
          'Ontario','McKinney','Elk Grove','Jackson','Pembroke Pines',
          'Salem','Springfield','Corona','Eugene','Fort Collins','Peoria',
          'Frisco','Cary','Lancaster','Hayward','Palmdale','Salinas','Alexandria',
          'Lakewood','Springfield','Pasadena','Sunnyvale','Macon','Pomona',
          'Hollywood','Kansas City','Escondido','Clarksville','Joliet','Rockford',
          'Torrance','Naperville','Paterson','Savannah','Bridgeport','Mesquite',
          'Killeen','Syracuse','McAllen','Pasadena','Bellevue','Fullerton',
          'Orange','Dayton','Miramar','Thornton','West Valley City','Olathe',
          'Hampton','Warren','Midland','Waco','Charleston','Columbia','Denton']

In [75]:
on_sale_start_date_time = "2017-10-10T00:00:00Z"
on_sale_start_date = "2017-10-10"

for city in cities:
    do_all(params={"onsaleOnAfterStartDate":on_sale_start_date,"city":city}, 
           filename="sales_20171009.csv")
    print("{}".format(state), end=" ")
    sleep(np.random.random())

In [76]:
fix_dates("sales_20171009.csv")

In [45]:
def fix_dates(filename):
    df = pd.DataFrame.from_csv(filename)
    date_cols = [col for col in df.columns if 'start' in col.lower() 
                     or 'end' in col.lower()]
    for date_col in date_cols:
        df[date_col] = pd.to_datetime(df[date_col])
    df.to_csv(filename)

In [30]:
df.to_csv("sales_20171009.csv")

In [36]:
import datetime as dt

presales = df[df.presaleStart_0 > dt.datetime.now()]

In [70]:
# presales.to_csv("presales.csv")
on_sale_start_date = "2017-10-10"

'2017-10-09'

In [73]:
do_all(params={"onsaleOnAfterStartDate":on_sale_start_date,"city":'arlington'}, 
       filename="arlington_sales.csv",num_pages=2)
fix_dates("arlington_sales.csv")

In [68]:
get_req(params={"onsaleOnAfterStartDate":on_sale_start_date_time,"city":'arlington'})

'https://app.ticketmaster.com/discovery/v2/events.json?onSaleStartDateTime=2017-10-10T00:00:00Z&city=arlington&size=200&sort=onSaleStartDate,asc&apikey=LAkX3xdKv160gdDWrsYSXxrWA8RXuUFa'

In [63]:
on_sale_start_date_time


'2017-10-10T00:00:00Z'