In [119]:
import requests
import json
import docx
import pandas as pd
import numpy as np

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Dpac
url = 'https://app.ticketmaster.com/discovery/v2/events.json?venueId=KovZpa2X8e&countryCode=US&apikey=Lqseo1SMwgdKqLEKOPTLlfJaLSCMtd0S'

response = requests.get(url)
print(response)
data=response.text

parsed_dpac = json.loads(data)


# Carolina Theatre
url = 'https://app.ticketmaster.com/discovery/v2/events.json?venueId=KovZpZAFAl6A&countryCode=US&apikey=Lqseo1SMwgdKqLEKOPTLlfJaLSCMtd0S'

response = requests.get(url)
print(response)
data=response.text

parsed_carolina_theater = json.loads(data)



<Response [200]>
<Response [200]>


# GET EVENT INFO

In [120]:
# Get all the events
list_of_events = parsed_dpac.get('_embedded').get('events')
list_of_events.extend(parsed_carolina_theater.get('_embedded').get('events'))

# Put all the events in a dataframe
events = pd.DataFrame({'events':list_of_events})

# Break the disctionaries into columns
# Now we have 1 column for each sales, locale, dates, etc
iteration_1 = events['events'].apply(pd.Series)

In [121]:
# list_of_events

In [122]:
# Get the event names and add them to a dataframe
list_of_event_names = [event.get('name') for event in list_of_events]
list_of_event_ids = [event.get('id') for event in list_of_events]
event_names_df = pd.DataFrame({'event name':list_of_event_names, 'event id':list_of_event_ids})

## SALES

In [123]:
pd.set_option('display.max_colwidth',500)

# Break the sales column into columns
# We now have public sales and presales
sales = iteration_1.sales.apply(pd.Series)

# Break the public sales into columns
sales_public = sales.public.apply(pd.Series)

# rename the public sales columns to distinguish them from the rest
for col in sales_public.columns:
    sales_public.rename({col:f'public sales {col}'}, axis = 1, inplace = True)
    


# Create a dataframe to add the final product of the steps below
presale_final = pd.DataFrame()

# Looping through the presales column to extract the information from the dictionaries
# each presales row will have a dictionary with all the presale methods 
for presales_list_element in sales.presales:
    
# """
# If there is no presale, the entire rowcell will be NaN
# In this case the pandas.isnull method will give us an error
# We will need to add a new blank row in the except statement
# We will also add a new column called 'blank' to make sure we don't mess the other columns names
# The column will be dropped at the end
# """
    try:
        length = len(pd.isnull(presales_list_element))
    except:
        dict_holder_df = pd.DataFrame({'blank':[np.nan]})
        presale_final = pd.concat([presale_final,dict_holder_df],axis=0)
        continue

# """
# The dictionaries will contain the name of the presale as well as the end and start date. 
# We need to extract the name and move it to the column names so that we can distinguish the start and end dates of each presale
# """
    dict_holder_df = pd.DataFrame()
    for presale_dict in presales_list_element:
        
        start_time_title = presale_dict['name'] + ' ' + list(presale_dict.keys())[0]
        end_time_title = presale_dict['name'] + ' ' + list(presale_dict.keys())[1]
        
        holder_df = pd.DataFrame({start_time_title:[presale_dict['startDateTime']], end_time_title:[presale_dict['endDateTime']]})
        dict_holder_df = pd.concat([dict_holder_df,holder_df], axis=1)
        
    presale_final = pd.concat([presale_final,dict_holder_df],axis=0)


# Reset the index so that we can merge
# drop the blank column
presale_final.reset_index(inplace=True)
presale_final.drop(columns=['index', 'blank'], inplace=True)

# DATES

In [124]:
pd.set_option('display.max_colwidth',500)

# Break the dates column into columns
# There are a lot of dates trapped into disctionaries. Use pd.Series multiple times to untangle everything
dates = iteration_1.dates.apply(pd.Series)
start_dates = dates.start.apply(pd.Series)
status = dates.status.apply(pd.Series)
initial_start_date = dates.initialStartDate.apply(pd.Series)

# Make sure there are no weird columns created by pd.series
initial_start_date = initial_start_date[['dateTime', 'localDate', 'localTime']]

# rename the dates columns to distinguish them
for col in start_dates.columns:
    start_dates.rename({col:f'event start {col}'}, axis = 1, inplace = True)

for col in initial_start_date.columns:
    initial_start_date.rename({col:f'event initial start {col}'}, axis = 1, inplace = True)

# CLASSIFICATIONS

In [125]:
pd.set_option('display.max_colwidth',500)

list_of_classification_dictionaries = [iteration_1.classifications[index][0] for index in range(len(iteration_1.classifications))] 

classification_df = pd.DataFrame({'classification_dictionaries':list_of_classification_dictionaries})
classification_df_broken_in_columns = classification_df['classification_dictionaries'].apply(pd.Series)


classification_df_primary_family = classification_df_broken_in_columns[['primary', 'family']]
columns_of_interest = list(classification_df_broken_in_columns.columns)
columns_of_interest.remove('primary')
columns_of_interest.remove('family')

classification_df_final = pd.DataFrame()

for column in columns_of_interest:
    series_holder = classification_df_broken_in_columns[column].apply(pd.Series)
    series_holder.drop(columns = ['id'], inplace=True)
    series_holder.rename({series_holder.columns[0]:column}, inplace = True, axis = 1)
    classification_df_final = pd.concat([classification_df_final,series_holder], axis = 1)

# PROMOTER

In [126]:
# Break the promoters column into columns
promoters = iteration_1.promoters.apply(pd.Series)

for index in range(len(promoters.columns)):
    promoters.rename({promoters.columns[index]: 'event promoter' + ' ' + str(promoters.columns[index]+1)}, axis = 1, inplace = True)

promoters_df_final = pd.DataFrame()

for column in promoters.columns:
    series_holder = promoters[column].apply(pd.Series)
    series_holder = series_holder[['name', 'description']]
    series_holder.rename({'name':column + ' ' + 'name', 'description':column + ' ' + 'description'}, inplace = True, axis = 1)
    promoters_df_final = pd.concat([promoters_df_final,series_holder], axis = 1)

# PRICE 

In [127]:
list_of_pricerange_dictionaries = [iteration_1.priceRanges[index][0] for index in range(len(iteration_1.priceRanges))] 

pricerange_df = pd.DataFrame({'pricerange_dictionaries':list_of_pricerange_dictionaries})
pricerange_df_broken_in_columns = pricerange_df['pricerange_dictionaries'].apply(pd.Series)


pricerange_df_broken_in_columns = pricerange_df_broken_in_columns[['type', 'min', 'max']]
pricerange_df_broken_in_columns.rename({'type':'price type', 'min':'price min', 'max':'price max'}, axis = 1, inplace = True)

# TICKET LIMIT 

In [128]:
# iteration_1.columns
ticket_limit = pd.DataFrame(iteration_1['ticketLimit'].apply(pd.Series))
ticket_limit.rename({'info':'ticket limit'},axis = 1, inplace = True)

please_note = pd.DataFrame(iteration_1['pleaseNote'])


# Merge

In [129]:
new_dataset = pd.concat([
    event_names_df
    ,iteration_1['info']
    ,sales_public
    ,presale_final
    ,start_dates
    ,status
    ,initial_start_date
    ,classification_df_final
    ,promoters_df_final
    ,pricerange_df_broken_in_columns
    ,ticket_limit
    ,please_note
    
], axis = 1)


try:
    current_dataset = pd.read_csv('final_ticketmaster_dataset.csv')
    current_ids = list(current_dataset['event id'])

    new_ids = list(new_dataset['event id'])

    updated_dataset = pd.concat([current_dataset, new_dataset[new_dataset['event id'].isin(np.setdiff1d(new_ids,current_ids))]], axis = 0)

    updated_dataset.to_csv('final_ticketmaster_dataset.csv')

    extra_ids = list(np.setdiff1d(new_ids,current_ids))
    print ('This pull resulted in', len(extra_ids), 'new events')


except:
    new_dataset.to_csv('final_ticketmaster_dataset.csv')
    print('except run')

This pull resulted in 0 new events
