In [26]:
from robobrowser import RoboBrowser
import geocoder
import folium
import pandas as pd
import dateparser
from joblib import Memory
import difflib
import pycountry
from iso3166 import countries

memory = Memory(cachedir='tmp', verbose=0)


@memory.cache
def get_latlng(query):
    try:
        g=geocoder.google(query)
        if g:
            country = g.geojson['features'][0]['properties']['country']
            code=countries.get(country).alpha3
            print(query, g.latlng, code)
            return g.latlng+[code]
        else:
            return None, None, None
    except:
        return None, None, None
    
browser = RoboBrowser(history=True, parser='html5lib')

@memory.cache
def get_data_paged(query, page):
    url = 'https://www.setlist.fm/search?page={}&query={}'.format(page, query)
    browser.open(url.format(page))
    data = []
    for concert in browser.select('.setlistPreview'):
        month = concert.select('.month')[0].text
        day = concert.select('.day')[0].text
        year = concert.select('.year')[0].text
        datetext = "{}, {} {}".format(year, month, day)
        date = dateparser.parse(datetext)
        desc = concert.select('h2 a')[0].text
        idx = desc.find(' at ')+4
        loc = desc[idx:]
        loc_pieces = loc.split(',')
        #print(loc_pieces[-1],country)
        if loc == None:
            continue
        if len(loc_pieces)>=3:
            loc = ','.join(loc_pieces[-3:])
        lat, lng, code = get_latlng(loc)
        if lat and lng:
            data.append([loc, lat, lng, code, date, desc])
    return data
    
columns = ['loc', 'lat', 'lon', 'code', 'date', 'desc']
concerts = pd.DataFrame(columns = columns)
for page in range(1,15):
    data = get_data_paged('Scorpions', page)
    df = pd.DataFrame(data, columns = columns)
    concerts = concerts.append(df)

concerts.head(20)

 Reno, NV, USA [39.5296329, -119.8138027] USA
 Tacoma, WA, USA [47.2528768, -122.4442906] USA
 Spokane, WA, USA [47.6587802, -117.4260465] USA
 West Valley, UT, USA [40.6916132, -112.0010501] USA
 Laval, QC, Canada [45.6066487, -73.712409] CAN
Münsterplatz, Ulm, Germany [48.3990075, 9.9917534] DEU
Festival de Nîmes 2017 [43.83490399999999, 4.3596146] FRA
Life Festival 2017 [44.2806066, -88.4540511] USA
Festhalle, Frankfurt, Germany [50.111863, 8.6508638] DEU
Arena Leipzig, Leipzig, Germany [51.3427769, 12.3547643] DEU
Lanxess Arena, Cologne, Germany [50.9383376, 6.9829511] DEU
Le Zénith de Dijon, Dijon, France [47.3554306, 5.0582189] FRA
Sepang International Circuit Helipad, Sepang, Malaysia [2.7639416, 101.7371401] MYS
Imperial Citadel of Thăng Long, Hanoi, Vietnam [21.0369234, 105.839892] VNM
Palais Theatre, Melbourne, Australia [-37.8673546, 144.976156] AUS
Hippodrome Henri Milliard, Noumea, New Caledonia [-22.3000238, 166.4508859] NCL
Grand Cube, Osaka, Japan [34.6894288, 135.48626

Unnamed: 0,loc,lat,lon,code,date,desc
0,"Phoenix, AZ, USA",33.448377,-112.074037,USA,2017-10-08,"Scorpions at Talking Stick Resort Arena, Phoen..."
1,"Inglewood, CA, USA",33.96168,-118.353131,USA,2017-10-07,"Scorpions at The Forum, Inglewood, CA, USA"
2,"Reno, NV, USA",39.529633,-119.813803,USA,2017-10-03,"Scorpions at Grand Sierra Theatre, Reno, NV, USA"
3,"Tacoma, WA, USA",47.252877,-122.444291,USA,2017-09-30,"Scorpions at Tacoma Dome, Tacoma, WA, USA"
4,"Spokane, WA, USA",47.65878,-117.426046,USA,2017-09-29,"Scorpions at Spokane Arena, Spokane, WA, USA"
5,"West Valley, UT, USA",40.691613,-112.00105,USA,2017-09-26,"Scorpions at USANA Amphitheatre, West Valley, ..."
0,"Rosemont, IL, USA",41.986751,-87.87216,USA,2017-09-23,"Scorpions at Allstate Arena, Rosemont, IL, USA"
1,"Toronto, ON, Canada",43.653226,-79.383184,CAN,2017-09-22,"Scorpions at Budweiser Stage, Toronto, ON, Canada"
2,"Laval, QC, Canada",45.606649,-73.712409,CAN,2017-09-19,"Scorpions at Place Bell, Laval, QC, Canada"
3,"New York, NY, USA",40.712775,-74.005973,USA,2017-09-16,"Scorpions at Madison Square Garden, New York, ..."


In [3]:
from robobrowser import RoboBrowser
import plotly.plotly as py
import pandas as pd
import networkx as nx
from cachier import cachier
import geocoder

great_lines = [ 
        dict(
            type = 'scattergeo',
            lon = concerts['lon'],
            lat = concerts['lat'],
            mode = 'lines',
            line = dict(
                    width = 1,
                    color = 'rgba(255,0,0,0.5)',
                    ),
        )
        ]
venue_markers = [ dict(
        type = 'scattergeo',
        lon = concerts['lon'],
        lat = concerts['lat'],
        hoverinfo = 'loc',
        text = concerts['loc'],
        mode = 'markers',
        marker = dict( 
            size=10, 
            color='rgba(255,0,0,0.5)',   
        ))]
    
layout = dict(
        title = 'recitales',
        width = 1000,
        height = 800,
            showlegend = False, 

            showland = True,
            showcountries = True,
            showocean = True,
            countrywidth = 0.5,
            landcolor = '#fff',
            oceancolor = '#eee',
    
        geo = dict(
            projection = dict( 
                type = 'Mercator',          
            ),
            
        )
    )
    
fig = dict( data=great_lines+venue_markers, layout=layout )
py.iplot( fig, validate=False, filename='d3-globe' )

In [27]:
df = concerts.groupby('code').count()
df=df.reset_index()
df.head(10)

Unnamed: 0,code,loc,lat,lon,date,desc
0,ARE,1,1,1,1,1
1,AUS,1,1,1,1,1
2,BEL,2,2,2,2,2
3,BGR,1,1,1,1,1
4,BRA,5,5,5,5,5
5,CAN,6,6,6,6,6
6,CHE,3,3,3,3,3
7,CHL,1,1,1,1,1
8,CHN,1,1,1,1,1
9,CZE,1,1,1,1,1


In [28]:
data = [ dict(
        type = 'choropleth',
        locations = df['code'],
        z = df['loc'],
        autocolorscale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'concerts'),
      ) ]

markers = [ dict(
        type = 'scattergeo',
        lon = concerts['lon'],
        lat = concerts['lat'],
        hovertext  = concerts['loc'],
        text = concerts['loc'] ,
        textposition = 'top center',
        mode = 'markers+text',
        string = concerts['loc'],
        marker = dict( 
            symbol = ['4'],
            size=12, 
            color='black',  
            linecolor = 'white'
            
        ))]

layout = dict(
     width = 800,
    height = 800,
    title = 'concerts',
    geo = dict(
        showframe = True,
        showcoastlines = True,
        projection = dict( 
                type = 'orthographic',
                rotation = dict(
                    lon = -100,
                    lat = 40,
                    roll = 0
                )            
            ),
    )
)

fig = dict( data=data+markers, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )