In [1]:
import requests
import pandas as pd

In [2]:
def get_cities():
    '''
    Retrieves all the cities for which Shazam has charts.
    
    Returns a list of dictionaries, one dictionary per city.
    '''

    locations = 'https://www.shazam.com/static/chartlocations.json'

    countries = requests.get(locations).json()['countries']

    cities = []
    for country in countries:
        for c in country['cities']:
            city = dict(c)
            city['country'] = country['name']
            city['country_id'] = country['id']
            city['country_listid'] = country['listid']
            cities.append(city)

    return cities

In [3]:
def get_shazams(trackid):
    '''
    Returns the number of shazams for a track
    '''
    url = 'https://www.shazam.com/shazam/v1/en-US/US/web/-/tagcounts/track/'+ str(trackid)
    limit = 5
    tries = 1
    while tries < limit:
        try:
            data = requests.get(url).json()
            return data['total']
        except:
            tries += 1
    

In [4]:
def get_track_details(trackid):
    '''
    This API call returns the details for a track. 
    '''
    url = 'https://www.shazam.com/discovery/v4/en-US/US/web/-/track/'+ str(trackid)
    data = requests.get(url).json()
    return data

In [5]:
def get_city_chart(city_entry):
    '''
    Retrieves the chart for a given city.
    '''
    city_id = city_entry['listid']
    chart_url = 'https://www.shazam.com/shazam/v2/en-US/US/web/-/tracks/' + city_id
    chart = requests.get(chart_url).json()['chart']
    rank = 0
    # Since we do not have a rank, we assume entries appearing first are the top ones in the chart
    # For each chart entry we add its rank, and the fields from the city dictionary
    for c in chart:
        rank += 1
        c['rank'] = rank
        for key, value in city_entry.items():
            c['city_' + key] = value

    return chart

In [6]:
def get_all_charts(cities):
    '''
    Goes through all cities, and fetches the charts for all of them,
    merging them in a big list
    '''
    all_charts = []
    for city in cities:
        entries = get_city_chart(city)
        all_charts.extend(entries)

    return all_charts

In [7]:
def create_dataframe(charts_data):
    '''
    Bookkeeping of the returned data from the Shazam API.
    We drop the irrelevant columns (e.g. thumbnails, store information, etc)
    Rename the columns to be more understandable, and reordering the column order
    '''

    cdf = pd.io.json.json_normalize(charts_data)
    cdf['chart_date'] = pd.Timestamp("today").strftime("%m/%d/%Y")

    to_drop = [
        'alias', 'type', 'actions', 'artists', 'share.avatar',
        'city_country_listid', 'city_countryid', 'city_listid', 'share.href',
        'share.html', 'share.image', 'share.subject', 'share.text',
        'share.twitter', 'stores.apple.actions', 'stores.apple.coverarturl',
        'stores.apple.explicit', 'stores.apple.previewurl',
        'stores.apple.productid', 'stores.apple.trackid',
        'stores.claromusicasearch.actions', 'stores.google.actions',
        'stores.google.previewurl', 'stores.google.productid',
        'stores.google.trackid', 'stores.itunes.actions',
        'stores.itunes.coverarturl', 'stores.itunes.explicit',
        'stores.itunes.previewurl', 'stores.itunes.productid',
        'stores.itunes.trackid', 'images.blurred', 'images.default',
        'images.play', 'urlparams.{trackartist}', 'urlparams.{tracktitle}'
    ]
    cdf = cdf.drop(to_drop, axis='columns')

    to_rename = {
        "city_country": "country",
        "city_country_id": "country_id",
        "heading.subtitle": "artist",
        "heading.title": "song_title",
        'key' : 'track_id'
    }
    cdf = cdf.rename(to_rename, axis='columns')

    columns = [
        'country', 'country_id', 'city_name', 'city_id', 'chart_date', 'rank',
        'song_title', 'artist', 'track_id', 'url'
    ]

    cdf = cdf[columns]
    
    # Query for each song/key and get the number of shazams
    shazams = dict()
    for key in sorted(set(cdf.track_id.values)):
        shazams[key] = get_shazams(key)
    
    cdf['num_shazams'] = cdf['track_id'].apply(lambda x : shazams[x])

    return cdf

In [8]:
%%time
if __name__ == "__main__":
    
    today = pd.Timestamp("today").strftime("%Y_%m_%d")
    cities = get_cities()
    us_cities = [c for c in cities if c['country_id'] == 'US']
    us_charts = get_all_charts(us_cities)
    df = create_dataframe(us_charts)
    
    filename = 'shazam_us_charts_' + today
    df.to_csv(filename + '.csv', index=False)
    df.to_excel(filename + '.xls', index=False)

CPU times: user 53.2 s, sys: 2.07 s, total: 55.3 s
Wall time: 6min 8s


In [None]:
# TODO Add the US top 100 chart to the existing data frame 
# Link to US chart: https://www.shazam.com/charts/top-100/united-states /