# Leaflet web map of Last.fm artists

To see the final product live, check out my article ["Analyzing Last.fm Listening History"](http://geoffboeing.com/2016/05/analyzing-lastfm-history/)

Convert the geocoded CSV file of artists produced by [musicbrainz_geocoder.ipynb](musicbrainz_geocoder.ipynb) to a GeoJSON file for Leaflet web mapping.

In [1]:
import pandas as pd, json, math, random
from IPython.display import IFrame

In [2]:
# load the geocoded data set of artists
df = pd.read_csv('data/mb_geocoded.csv', encoding='utf-8')
print('{:,} total rows'.format(len(df)))
df = df[pd.notnull(df['place_latlng'])]
print('{:,} rows with lat-long'.format(len(df)))
print('{:,} unique lat-longs'.format(len(df['place_latlng'].unique())))

19,358 total rows
14,556 rows with lat-long
2,534 unique lat-longs


In [3]:
# determine how many times each place appears in dataset, and break latlng into discrete lat and long
place_counts = df['place_full'].value_counts()
df['place_count'] = df['place_full'].map(lambda x: place_counts[x])
df['lat'] = df['place_latlng'].map(lambda x: x.split(',')[0])
df['lng'] = df['place_latlng'].map(lambda x: x.split(',')[1])
df = df[['name', 'place_full', 'place_count', 'lat', 'lng']]
df.head()

Unnamed: 0,name,place_full,place_count,lat,lng
0,José González,"Gothenburg, Västra Götaland, Sweden",12,57.7072326,11.9670171
1,Kings of Convenience,"Bergen, Hordaland, Norway",16,60.3943457,5.3258853
3,The Von Bondies,"Detroit, Wayne County, Michigan, United States",71,42.3486635,-83.0567374
4,Blood Red Shoes,"Brighton, Brighton and Hove, England, United K...",14,50.8220399,-0.137406
5,The Horrors,"Cedar Rapids, Linn County, Iowa, United States",1,41.9758872,-91.6704052


In [4]:
# create html list of artists from each place
features = []
for place_full in df['place_full'].unique():
    
    # how many artists to show before saying "...and n more"
    num_to_show = 3
    line_break = '<br />'
    artists = ''
    
    place_count = place_counts[place_full]
    names = df[df['place_full']==place_full]['name']
    
    if place_count <= num_to_show:
        for name in names:
            artists = '{}{}{}'.format(artists, name, line_break)
            
    else:
        for name in names[0:num_to_show]:
            artists = '{}{}{}'.format(artists, name, line_break)
        artists = '{}...and {:,} more'.format(artists, place_count - num_to_show)
        
    features.append([place_full, artists])

df_leaflet = pd.DataFrame(features, columns=['place_full', 'artists'])

In [5]:
# strip off any tailing (and hence unnecessary) line breaks at the end of the artists list
# can't use str.strip for this because it strips characters: artist names would lose trailing b's and r's
df_leaflet['artists'] = df_leaflet['artists'].map(lambda x: x[:-len(line_break)] if x.endswith(line_break) else x)

In [6]:
# jitter either a lat or a lng within KMs of original
def jitter(val, kms=0.5):
    earth_radius = 6378.16
    one_degree = (2 * math.pi * earth_radius) / 360
    one_km = 1 / one_degree
    lower_range = val - (kms * one_km)
    upper_range = val + (kms * one_km)
    return random.random() * (upper_range - lower_range) + lower_range

In [7]:
# add lat and long back to the dataframe
place_lat_lng = {}
df_unique = df[['place_full', 'lat', 'lng']].drop_duplicates(subset='place_full')
for label in df_unique.index:
    place_lat_lng[df_unique.loc[label, 'place_full']] = (df_unique.loc[label, 'lat'], df_unique.loc[label, 'lng'])

# extract lat & lng, convert to float, jitter, and round to 7 decimal places
df_leaflet['lat'] = df_leaflet['place_full'].map(lambda x: '{:.7f}'.format(jitter(float(place_lat_lng[x][0]))))
df_leaflet['lng'] = df_leaflet['place_full'].map(lambda x: '{:.7f}'.format(jitter(float(place_lat_lng[x][1]))))
df_leaflet.head()

Unnamed: 0,place_full,artists,lat,lng
0,"Gothenburg, Västra Götaland, Sweden",José González<br />Björn Ulvaeus<br />Little D...,57.7030539,11.962678
1,"Bergen, Hordaland, Norway",Kings of Convenience<br />Gisle Torvik<br />Ma...,60.3898842,5.3248914
2,"Detroit, Wayne County, Michigan, United States",The Von Bondies<br />MC5<br />Sufjan Stevens<b...,42.3510587,-83.0564847
3,"Brighton, Brighton and Hove, England, United K...",Blood Red Shoes<br />British Sea Power<br />Fu...,50.8227275,-0.1338711
4,"Cedar Rapids, Linn County, Iowa, United States",The Horrors,41.9746907,-91.6746103


## Write to GeoJSON for leaflet mapping

In [8]:
# function to write the dataframe out to geojson
def df_to_geojson(df, properties, lat='latitude', lon='longitude'):
    # create a new python dict to contain our geojson data, using geojson format
    geojson = {'type':'FeatureCollection', 'features':[]}

    # loop through each row in the dataframe and convert each row to geojson format
    for _, row in df.iterrows():
        # create a feature template to fill in
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}

        # fill in the coordinates
        feature['geometry']['coordinates'] = [row[lon],row[lat]]

        # for each column, get the value and add it as a new feature property
        for prop in properties:
            feature['properties'][prop] = row[prop]
        
        # add this feature (aka, converted dataframe row) to the list of features inside our dict
        geojson['features'].append(feature)
    
    return geojson

In [9]:
geojson = df_to_geojson(df_leaflet, df_leaflet.columns, lat='lat', lon='lng')

# save the geojson result to a file
output_filename = 'leaflet/lastfm-dataset.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var dataset={};'.format(json.dumps(geojson, separators=(',',':'))))
    
# how many features did we save to the geojson file?
print('{:,} geotagged features saved to file'.format(len(geojson['features'])))

2,575 geotagged features saved to file


In [10]:
# show the iframe of the leaflet web map here
IFrame('leaflet/lastfm-artists-map.html', width=600, height=400)

To see the final product live, visit: http://geoffboeing.com/2016/05/analyzing-lastfm-history/