In [3]:
#import the API scraping packages
import requests
from requests.exceptions import HTTPError

#Set the API URL
url = 'https://app.ticketmaster.com/discovery/v2/venues?apikey=7elxdku9GGG5k8j0Xm8KWdANDgecHMV0&unit=miles&locale=*&size=500&page=1'

#Connect to the API. If it fails, raise a helpful error.
try:
    response = requests.get(url)
    response.raise_for_status()
except HTTPError as http_err:
    print('HTTP error occurred: ' + str(http_err))
except Exception as err:
    print('Other error occurred: ' + str(err))
else:
    pass

#Read all API data
source = requests.get(url).json()

#Rawest output of all API data
print(source)



So what we have above is a bunch of embedded dictionaries (things that map to other things). We have to dig through these to find the data we want.

In [4]:
#Pull the first layer of dictionaries
for a in source:
    print(a)

_links
_embedded
page


Thus, there are 3 primary dictionaries in this API. Here's the raw data for each:

In [18]:
print(source['_links'])        

{u'next': {u'href': u'/discovery/v2/venues?unit=miles&locale=*&page=2&size=500'}, u'self': {u'href': u'/discovery/v2/venues?unit=miles&size=500&page=1&locale=*'}, u'prev': {u'href': u'/discovery/v2/venues?unit=miles&locale=*&page=0&size=500'}, u'last': {u'href': u'/discovery/v2/venues?unit=miles&locale=*&page=235&size=500'}, u'first': {u'href': u'/discovery/v2/venues?unit=miles&locale=*&page=0&size=500'}}


In [19]:
print(source['_embedded'])  



In [20]:
print(source['page'])

{u'totalPages': 236, u'totalElements': 117819, u'number': 1, u'size': 500}


'_embedded' has the information we want

In [21]:
for a in source['_embedded']:
    print(a)

venues


There's only 1 dictionary in the '_embedded' dictionary.

In [32]:
a = source['_embedded']
b = a['venues']
for c in b:
    print(c)

{u'city': {u'name': u'Crystal Bay'}, u'postalCode': u'89402', u'name': u'Crystal Bay Club Casino Crown Room', u'dmas': [{u'id': 250}, {u'id': 273}, {u'id': 282}, {u'id': 341}, {u'id': 368}, {u'id': 374}, {u'id': 382}], u'url': u'https://www.ticketmaster.com/crystal-bay-club-casino-crown-room-tickets-crystal-bay/venue/189416', u'country': {u'name': u'United States Of America', u'countryCode': u'US'}, u'accessibleSeatingDetail': u'Any requests for handicap accommodation will need to be addressed by contacting the Director of Security at the Crystal Bay Casino a minimum of 24 hours in advance of the show you plan on attending.', u'generalInfo': {u'generalRule': u'All events 21+'}, u'markets': [{u'name': u'N. California/N. Nevada', u'id': u'41'}], u'locale': u'en-us', u'state': {u'name': u'Nevada', u'stateCode': u'NV'}, u'_links': {u'self': {u'href': u'/discovery/v2/venues/KovZpZAEJIaA?locale=en-us'}}, u'location': {u'latitude': u'39.227664', u'longitude': u'-120.004578'}, u'address': {u'l

'Venues' has a bunch of stuff, but it looks closer to the final data we want. I tried to go deeper, but the next layer is not comprised of dictionaries:

In [33]:
a = source['_embedded']
b = a['venues']
c = b['city']
for d in c:
    print(d)

TypeError: list indices must be integers, not str

So, now we can pull elements from this layer. This is the first element of b = a['venues'] = source['_embedded']['venues']

In [38]:
print(b[0])

{u'city': {u'name': u'Crystal Bay'}, u'postalCode': u'89402', u'name': u'Crystal Bay Club Casino Crown Room', u'dmas': [{u'id': 250}, {u'id': 273}, {u'id': 282}, {u'id': 341}, {u'id': 368}, {u'id': 374}, {u'id': 382}], u'url': u'https://www.ticketmaster.com/crystal-bay-club-casino-crown-room-tickets-crystal-bay/venue/189416', u'country': {u'name': u'United States Of America', u'countryCode': u'US'}, u'accessibleSeatingDetail': u'Any requests for handicap accommodation will need to be addressed by contacting the Director of Security at the Crystal Bay Casino a minimum of 24 hours in advance of the show you plan on attending.', u'generalInfo': {u'generalRule': u'All events 21+'}, u'markets': [{u'name': u'N. California/N. Nevada', u'id': u'41'}], u'locale': u'en-us', u'state': {u'name': u'Nevada', u'stateCode': u'NV'}, u'_links': {u'self': {u'href': u'/discovery/v2/venues/KovZpZAEJIaA?locale=en-us'}}, u'location': {u'latitude': u'39.227664', u'longitude': u'-120.004578'}, u'address': {u'l

And guess what? These elements are comprised of more dictionaries! If we take just the first element b[0]:

In [40]:
for c in b[0]:
    print(c)

city
postalCode
name
dmas
url
country
accessibleSeatingDetail
generalInfo
markets
locale
state
_links
location
address
test
timezone
boxOfficeInfo
upcomingEvents
type
id


We find the above categories.

In [43]:
c=b[0]
print(c['city'])

{u'name': u'Crystal Bay'}


Now, we can pull the city name by refencing the "city" "name" dictionary.

In [44]:
d=c['city']
print(d['name'])

Crystal Bay


Here are all of the city names:

In [46]:
a = source['_embedded']
b = a['venues']
for c in b:
    d=c['city']
    e=d['name']
    print(e)

Crystal Bay
Grand Junction
Bristol
Las Vegas
Charlotte
Minneapolis
Detroit
Tupelo
Evansville
Houston
Columbus
Rochester
Sacramento
Dodge City
Atlantic City
Portland
Jacksonville
Boston
Fort Worth
Minneapolis
Duluth
Chicago
Columbia
Columbus
Las Vegas
Jonesboro
Portland
Knoxville
Los Angeles
Farmingville
Las Vegas
Charleston
Milwaukee
Dallas
Providence
Syracuse
El Cajon
Kalamazoo
San Antonio
Ponte Vedra Beach
Philadelphia
Montclair
Charleston
Poughkeepsie
Primm
Terre Haute
Reading
Binghamton
Norfolk
Atlantic City
Ft Lauderdale
Prescott Valley
Grand Junction
Robinsonville
Portland
Indianapolis
Manchester
Thousand Oaks
Cleveland
Saint Louis
Mashantucket
Riverside
Warren
Beverly
Orlando
Milwaukee
Boise
Columbus
Broomfield
Syracuse
Evansville
Shakopee
Pikeville
New Orleans
Southaven
Miramar
Toledo
Phoenix
Daytona Beach
Council Bluffs
Reading
Florence
Napa
Baltimore
Baltimore
Savannah
Wallingford
Rochester
Dover
Pearl
Las Vegas
Jackson
Saint Charles
Akron
Fresno
Portland
Little Rock
Milwauke

Everything else can be found similarly. But, the exact pull might not be the same since some categories won't have sub-dictionaries. Ex: postalCode doesn't need as many layers as city.

In [47]:
a = source['_embedded']
b = a['venues']
for c in b:
    d=c['postalCode']
    print(d)

89402
81501
37620
89135
28206
55414
48226
38804
47708
77002
43201
14604
95815
67801
08401
04101
32202
02134
76107
55403
55802
60601
29208
31901
89109
72401
04101
37915
90015
11738
89109
29492
53203
75201
02903
13202
92020
49002
78219
32082
19123
91764
25301
12601
89019
47809
19601
13901
23514
08401
33304
86314
81501
38664
97227
46208
03101
91362
44115
63112
06355
92501
44483
01915
32808
53202
83712
43215
80021
13202
47708
55379
41501
70122
38672
33027
43609
85003
32118
51501
19602
29501
94559
21250
21201
31401
06492
48309
19901
39288
89103
39202
63303
44325
93721
04102
72201
53203
72201
01952
78577
19082
55904
16802
18701
48226
33306
30303
89109
31201
25701
85305
91301
89109
13202
08401
55802
47432
85239
60601
70130
85256
35203
33312
94133
92082
89449
34952
04401
95110
89505
98109
58104
12720
91362
70130
90802
75050
77705
14614
10036
85226
46320
99501
92626
53703
16802
97403
20001
50011
58102
13478
57107
33675
46802
80202
66603
94928
31217
89109
89512
85701
70813
55337
29456
66612
1710

Once you know the right pull for each category, edit the below codes for loops, then this will dump the data into a dataframe.

In [86]:
#import Pandas for dataframes
import pandas as pd 

#Create a bunch of lists to dump the API data into
city_list = []
postalCode_list = []
name_list = []
dmas_list = []
url_list = []
country_list = []
accessibleSeatingDetail_list = []
generalInfo_list = []
markets_list = []
locale_list = []
state_list = []
_links_list = []
location_list = []
address_list = []
test_list = []
timezone_list = []
boxOfficeInfo_list = []
upcomingEvents_list = []
type_list = []
id_list = []

a = source['_embedded']
b = a['venues']
for c in b:
    
    d=c['city']
    city_list.append(d['name'])
    
    postalCode_list.append(c['postalCode'])
    name_list.append(c['name'])

    d=c['dmas']
    dmas_list.append(d[0])
    
    url_list.append(c['url'])
    
    d=c['country']
    country_list.append(d['countryCode'])
    
    try:
        accessibleSeatingDetail_list.append(c['accessibleSeatingDetail'])
    except:
        accessibleSeatingDetail_list.append('NaN')
    
    try:
        d=c['generalInfo']
        generalInfo_list.append(d['generalRule'])
    except:
        generalInfo_list.append('NaN')
    
    d=c['markets']
    markets_list.append(d[0])
    
    locale_list.append(c['locale'])
    
    d=c['state']
    state_list.append(d['stateCode'])
    
    d=c['_links']
    e=d['self']
    _links_list.append(e['href'])
    
    d=c['location']
    location_list.append(str(d['latitude']) + str(d['longitude']))
    
    d=c['address']
    address_list.append(d['line1'])
    
    test_list.append(c['test'])
    timezone_list.append(c['timezone'])
    
    try:
        d=c['boxOfficeInfo']
        boxOfficeInfo_list.append(d['phoneNumberDetail'])
    except:
        boxOfficeInfo_list.append('NaN')
    
    try:
        d=c['upcomingEvents']
        upcomingEvents_list.append(d['total'])
    except:
        upcomingEvents_list.append('NaN')
    
    type_list.append(c['type'])
    id_list.append(c['id'])
    
df = pd.DataFrame(list(zip(city_list,postalCode_list,name_list,dmas_list,url_list,country_list,accessibleSeatingDetail_list,
                                generalInfo_list,markets_list,locale_list,state_list,_links_list,location_list,address_list,
                                test_list,timezone_list,boxOfficeInfo_list,upcomingEvents_list,type_list,id_list)),
              columns=['city','postalCode','name','dmas','url','country','accessibleSeatingDetail','generalInfo','markets'
                      ,'locale','state','_links','location','address','test','timezone','boxOfficeInfo','upcomingEvents_list',
                      'type_list','id_list'])

print(df.head(5))

             city postalCode  \
0     Crystal Bay      89402   
1  Grand Junction      81501   
2         Bristol      37620   
3       Las Vegas      89135   
4       Charlotte      28206   

                                                name          dmas  \
0                 Crystal Bay Club Casino Crown Room  {u'id': 250}   
1                                     Avalon Theatre  {u'id': 264}   
2  Thunder Valley Amphitheatre pres. by Ballad He...  {u'id': 246}   
3        The Sandbar at Red Rock Casino Resort & Spa  {u'id': 319}   
4                                    The Underground  {u'id': 245}   

                                                 url country  \
0  https://www.ticketmaster.com/crystal-bay-club-...      US   
1  https://www.ticketmaster.com/avalon-theatre-ti...      US   
2  https://www.ticketmaster.com/thunder-valley-am...      US   
3  https://www.ticketmaster.com/the-sandbar-at-re...      US   
4  https://www.ticketmaster.com/the-underground-t...      US   

 