## https://www.setlist.fm/search?query=scorpions

In [None]:
from robobrowser import RoboBrowser
import geocoder
import folium
import pandas as pd
import dateparser
from joblib import Memory
import difflib
import pycountry
from iso3166 import countries
memory = Memory(cachedir='tmp', verbose=0)

In [None]:
url = 'https://www.setlist.fm/search?query=scorpions'
browser = RoboBrowser(history=True, parser='html5lib')
browser.open(url)
concert = browser.select('.setlistPreview')[0]
concert

In [None]:
day = concert.select('.day')[0].text
month = concert.select('.month')[0].text
year = concert.select('.year')[0].text
desc = concert.select('h2 a')[0].text
print(day,month,year,desc)

In [None]:
idx = desc.find(' at ')+4
loc = desc[idx:]
loc_pieces = loc.split(',')
if len(loc_pieces)>=3:
    loc = ','.join(loc_pieces[-3:])
print(loc)

In [None]:
g=geocoder.google(loc)
print(g.geojson)


In [None]:
code=g.geojson['features'][0]['properties']['country']
print(code)

In [None]:
from iso3166 import countries
code=countries.get(code).alpha3
print(code)

In [None]:
@memory.cache
def get_latlng(query):
    try:
        g=geocoder.google(query)
        if g:
            country = g.geojson['features'][0]['properties']['country']
            code=countries.get(country).alpha3
            print(query, g.latlng, code)
            return g.latlng+[code]
        else:
            return None, None, None
    except:
        return None, None, None
    

@memory.cache
def get_data_paged(query, page):
    url = 'https://www.setlist.fm/search?page={}&query={}'.format(page, query)
    browser.open(url.format(page))
    data = []
    for concert in browser.select('.setlistPreview'):
        month = concert.select('.month')[0].text
        day = concert.select('.day')[0].text
        year = concert.select('.year')[0].text
        datetext = "{}, {} {}".format(year, month, day)
        date = dateparser.parse(datetext)
        desc = concert.select('h2 a')[0].text
        idx = desc.find(' at ')+4
        loc = desc[idx:]
        loc_pieces = loc.split(',')
        #print(loc_pieces[-1],country)
        if loc == None:
            continue
        if len(loc_pieces)>=3:
            loc = ','.join(loc_pieces[-3:])
        lat, lng, code = get_latlng(loc)
        if lat and lng:
            data.append([loc, lat, lng, code, date, desc])
    return data
    
columns = ['loc', 'lat', 'lon', 'code', 'date', 'desc']
concerts = pd.DataFrame(columns = columns)
for page in range(1,15):
    data = get_data_paged('Scorpions', page)
    df = pd.DataFrame(data, columns = columns)
    concerts = concerts.append(df)

concerts.head(20)

In [None]:
import plotly.plotly as py

great_lines = [ 
        dict(
            type = 'scattergeo',
            lon = concerts['lon'],
            lat = concerts['lat'],
            mode = 'lines',
            line = dict(
                    width = 1,
                    color = 'rgba(255,0,0,0.5)',
                    ),
        )
        ]
venue_markers = [ dict(
        type = 'scattergeo',
        lon = concerts['lon'],
        lat = concerts['lat'],
        hoverinfo = 'loc',
        text = concerts['loc'],
        mode = 'markers',
        marker = dict( 
            size=10, 
            color='rgba(255,0,0,0.5)',   
        ))]
    
layout = dict(
        title = 'recitales',
        width = 1000,
        height = 800,
            showlegend = False, 

            showland = True,
            showcountries = True,
            showocean = True,
            countrywidth = 0.5,
            landcolor = '#fff',
            oceancolor = '#eee',
    
        geo = dict(
            projection = dict( 
                type = 'Mercator',          
            ),
            
        )
    )
    
fig = dict( data=great_lines+venue_markers, layout=layout )
py.iplot( fig, validate=False, filename='d3-globe' )

In [None]:
df = concerts.groupby('code').count()
df.head(20)

In [None]:
df=df.reset_index()
df.head(20)

In [None]:
data = [ dict(
        type = 'choropleth',
        locations = df['code'],
        z = df['loc'],
        autocolorscale = True,
        marker = dict(
            line = dict (
                color = 'rgb(180,180,180)',
                width = 0.5
            ) ),
        colorbar = dict(
            autotick = False,
            tickprefix = '',
            title = 'concerts'),
      ) ]

markers = [ dict(
        type = 'scattergeo',
        lon = concerts['lon'],
        lat = concerts['lat'],
        hovertext  = concerts['loc'],
        text = concerts['loc'] ,
        textposition = 'top center',
        mode = 'markers',
        string = concerts['loc'],
        marker = dict( 
            symbol = ['4'],
            size=10, 
            color='black',  
            linecolor = 'white'
            
        ))]

layout = dict(
    width = 800,
    height = 800,
    showlegend = False, 
    title = 'concerts scorpions',
    geo = dict(
        showframe = True,
        showcoastlines = True,
        projection = dict( 
                type = 'orthographic',
                rotation = dict(
                    lon = -100,
                    lat = 40,
                    roll = 0
                )            
            ),
    )
)

fig = dict( data=data+markers+great_lines, layout=layout )
py.iplot( fig, validate=False, filename='d3-world-map' )