# Backend Database (Part 1: Lastfm)

The foundation of our backend is to have an extensive list of artist and information relevant to them. We will use X different APIs to collect various information:

1. **lastfm API to gather a long list of artists, mainly those popular in the US.**
2. SeatGeek API to gather data on upcoming concerts, particularly ticket pricing and concert size.
3. Scrape SeatGeek website for capacity information on concert venues.
4. Songkick API to retrieve data on historical concerts.
5. Scrape Billboard website for recent successful concerts. Includes information like revenue and attendance.


**1. Use lastfm API to get a list of artists**

In [178]:
import pandas as pd
import requests
from datetime import datetime
import MySQLdb as mdb
import sys

In [89]:
#Get list of artist from lastfm API
api_root_url = "http://ws.audioscrobbler.com/2.0/"
url_params = "?method=chart.gettopartists&api_key=d31531b394866cb9db67526b455f274b&format=json&limit=1000&page="

url = api_root_url + url_params
data = requests.get(url).json()

In [165]:
def get_artist_lastfm(url):
    artist_list = []
    for i in range(1,11):
        url2 = url + str(i)
        data = requests.get(url2).json()
        data = data['artists']['artist']
        for j in range(len(data)):
            dict_artist = {}
            dict_artist['name'] = data[j]['name']
            dict_artist['playcount'] = int(data[j]['playcount'])
            dict_artist['image'] = data[j]['image'][-1]['#text'] #-1 gets the largest size
            artist_list.append(dict_artist)
    return artist_list

In [166]:
artist_names = get_artist_lastfm(url)

In [187]:
print('Number of Artists: ', len(artist_names))

Number of Artists:  9030


## Creating Lastfm Database

In [179]:
#Connecting to MySQL database
con = mdb.connect(host = 'localhost', 
                  user = 'root',
                  passwd = '<password>', 
                  charset='utf8', use_unicode=True);

In [181]:
# Run a query to create a database that will hold the data
db_name = 'Project'
create_db_query = "CREATE DATABASE IF NOT EXISTS {db} DEFAULT CHARACTER SET 'utf8'".format(db=db_name)

# Create a database
cursor = con.cursor()
cursor.execute(create_db_query)
cursor.close()

In [190]:
#Create a table for lastfm
cursor = con.cursor()
table_name = 'lastfm'

create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table} 
                                (artist varchar(250), 
                                playcount int,
                                image_url varchar(250),
                                PRIMARY KEY(artist)
                                )'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()

In [192]:
#Insert artist data
cursor = con.cursor()
table_name = 'lastfm'

query_template = '''INSERT IGNORE INTO {db}.{table}(artist, 
                                            playcount,
                                            image_url) 
                                            VALUES (%s, %s, %s)'''.format(db=db_name, table=table_name)

cursor = con.cursor()

artist_names = artist_names

for i in range(len(artist_names)):
    artist = artist_names[i]['name']
    playcount = artist_names[i]['playcount']
    image_url = artist_names[i]['image']
    
    query_parameters = (artist, playcount, image_url)
    cursor.execute(query_template, query_parameters)

con.commit()
cursor.close()



**2. Use SeatGeek to collect data on upcoming concerts**

In [156]:
artist = "https://api.seatgeek.com/2/performers?slug=ed-sheeran&client_id=<client_id>"

#url = api_root_url + url_params
seatgeek = requests.get(artist).json()

In [21]:
#Get event data. Creates 1 list that contains everything here, easy to loop for each artist later on
def get_event_data_seatgeek(artist):
    ''' Fetches upcoming concert/events information'''
    artist = artist.lower().replace(" ", "-")
    url = "https://api.seatgeek.com/2/events?performers.slug=" + artist + "&client_id=<client_id>"
    events = requests.get(url).json()['events']
    events_list = []
    for i in range(len(events)-1):
        dict_event = {}
        dict_event['artist'] = artist
        dict_event['title'] = events[i]['short_title'] #title
        dict_event['concert_date'] = datetime.strptime(events[i]['datetime_utc'][0:10], '%Y-%m-%d') #datetime of performance
        dict_event['address'] = events[0]['venue']['address'] + ', ' + events[0]['venue']['extended_address'] #address
        dict_event['venue'] = events[0]['venue']['name'].lower().replace(" ", "-")#venue name
        dict_event['lat'] = events[i]['venue']['location']['lat'] #latitude
        dict_event['lat'] = events[i]['venue']['location']['lon'] #longitude
        dict_event['average_price'] = events[i]['stats']['average_price'] #average price
        dict_event['highest_price'] = events[i]['stats']['highest_price'] #highest price
        dict_event['lowest_price'] = events[i]['stats']['lowest_price'] #lowest price
        dict_event['good_deals'] = events[i]['stats']['lowest_price_good_deals'] #best deal
        events_list.append(dict_event)
    return events_list

In [20]:
seatgeek

{'meta': {'geolocation': None,
  'page': 1,
  'per_page': 10,
  'took': 1,
  'total': 1},
 'performers': [{'colors': None,
   'divisions': None,
   'genres': [{'id': 452, 'name': 'Pop', 'primary': True, 'slug': 'pop'},
    {'id': 456, 'name': 'Rock', 'primary': False, 'slug': 'rock'},
    {'id': 467, 'name': 'Folk', 'primary': False, 'slug': 'folk'}],
   'has_upcoming_events': True,
   'home_venue_id': None,
   'id': 13546,
   'image': 'https://chairnerd.global.ssl.fastly.net/images/performers-landscape/ed-sheeran-a111f3/13546/huge.jpg',
   'image_attribution': None,
   'image_license': None,
   'images': {'huge': 'https://chairnerd.global.ssl.fastly.net/images/performers-landscape/ed-sheeran-a111f3/13546/huge.jpg'},
   'links': [],
   'name': 'Ed Sheeran',
   'num_upcoming_events': 19,
   'popularity': 0.0,
   'score': 0.7799999714,
   'short_name': 'Ed Sheeran',
   'slug': 'ed-sheeran',
   'stats': {'event_count': 19},
   'taxonomies': [{'id': 2000000, 'name': 'concert', 'parent_id':

# Creating Upcoming Concerts Database

In [None]:
#Connecting to MySQL database
con = mdb.connect(host = 'localhost', 
                  user = 'root',
                  passwd = '<password>', 
                  charset='utf8', use_unicode=True);

In [None]:
# Run a query to create a database that will hold the data
db_name = 'SeatGeek'
create_db_query = "CREATE DATABASE IF NOT EXISTS {db} DEFAULT CHARACTER SET 'utf8'".format(db=db_name)

# Create a database
cursor = con.cursor()
cursor.execute(create_db_query)
cursor.close()

In [None]:
#Create a table for Trending_Descriptions (static data)
cursor = con.cursor()
table_name = 'Core'
# Create a table
# The {db} and {table} are placeholders for the parameters in the format(....) statement
create_table_query = '''CREATE TABLE IF NOT EXISTS {db}.{table} 
                                (hotel_name varchar(250), 
                                publish_date datetime,
                                price int,
                                labels varchar(250),
                                image_url varchar(250),
                                PRIMARY KEY(hotel_name, publish_date)
                                )'''.format(db=db_name, table=table_name)
cursor.execute(create_table_query)
cursor.close()

In [None]:
# Scraping for hotel name, price, labels and image url for first 150 hotels
#Creating description table and fetch data 
cursor = con.cursor()
table_name = 'Core'

query_template = '''INSERT IGNORE INTO {db}.{table}(hotel_name, 
                                            publish_date,
                                            price,
                                            labels,
                                            image_url) 
                                            VALUES (%s, %s, %s, %s, %s)'''.format(db=db_name, table=table_name)

cursor = con.cursor()

for url in url_list:
    listing = listing_maker(url)
    for i in listing:
        hotel_name = get_name(i)
        publish_date = datetime.now(timezone('America/New_York')).strftime("%Y-%m-%d %H:%M:%S %Z%z")
        price = get_price(i)
        labels = get_labels(i)
        image_url = get_image(i)
        
        query_parameters = (hotel_name, publish_date, price, labels, image_url)
        cursor.execute(query_template, query_parameters)

con.commit()
cursor.close()