# Data Mining Dylan

*Which places does Dylan sing about the most?*

- For named entity recognition code, see `find_cities.ipynb`
- For web scraping code, see `dylan_scraper.py`

In [1]:
import pandas as pd
import folium
import altair
from analysis import tidy_table

In [2]:
# Load city data from file
city_df = pd.read_json('data/city_counts.json')
songs_df = pd.read_json('data/songs.json')

In [3]:
# Make tidy versions
city_tidy = tidy_table(city_df, ['songs'])
songs_tidy = tidy_table(songs_df, ['albums'])
df = pd.merge(city_tidy, songs_tidy, left_on='song', right_on='title')

## A bar chart

In [4]:
altair.Chart(city_df.query('cnt > 1')).mark_bar().encode(
    x='city', y='cnt')

## A Map

In [5]:
import xarray as xr
import numpy as np
import pandas as pd
import holoviews as hv
import geoviews as gv
import geoviews.feature as gf

import cartopy
from cartopy import crs as ccrs

from bokeh.tile_providers import STAMEN_TONER
from bokeh.models import WMTSTileSource

hv.notebook_extension('bokeh')

ModuleNotFoundError: No module named 'geoviews'

In [None]:
tiles = {'OpenMap': WMTSTileSource(url='http://c.tile.openstreetmap.org/{Z}/{X}/{Y}.png'),
         'ESRI': WMTSTileSource(url='https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{Z}/{Y}/{X}.jpg'),
         'Wikipedia': WMTSTileSource(url='https://maps.wikimedia.org/osm-intl/{Z}/{X}/{Y}@2x.png'),
         'Stamen Toner': STAMEN_TONER}

In [16]:
cities = pd.read_csv('./assets/cities.csv', sep='\t')
cities.set_index(' ')

Unnamed: 0,City,Country,Latitude,Longitude,Year,Population
,,,,,,
0.0,Sofia,Bulgaria,42.7,23.33,1950.0,520000.0
1.0,Mandalay,Myanmar,21.97,96.08,1950.0,170000.0
2.0,Nay Pyi Taw,Myanmar,19.75,96.1,1950.0,0.0
3.0,Yangon,Myanmar,16.87,96.12,1950.0,1300000.0
4.0,Minsk,Belarus,53.89,27.57,1950.0,280000.0


In [6]:
import pandas as pd
import ipywidgets as widgets
from viz_tools import get_place_selector, make_map

In [7]:
# Load city data from file
city_df = pd.read_json('data/city_counts.json')
songs_df = pd.read_json('data/songs.json')

In [8]:
view = get_place_selector(city_df, songs_df)

In [9]:
view

In [13]:
class LabelAndEntry(widgets.HBox):
    def __init__(self, *args, **kwargs):
        label = widgets.Label('my-label')
        entry = widgets.Text('data')
        super(LabelAndEntry, self).__init__()
        self.children = [label, entry]

In [36]:
class Menu(widgets.VBox):
    def __init__(self, city_df, songs_df, *args, **kwargs):
        
        city_df = city_df.set_index('city')
        songs_df = songs_df.set_index('title')
        
        self.sel = widgets.Dropdown(options=sorted(list(city_df.index.unique())))
        self.text = widgets.HTML(disabled=False)
        self.song_selector = widgets.Select(options=city_df.loc[self.sel.value, 'songs'])
        self.lyrics_viewer = widgets.HTML(placeholder='Lyrics...')
        
        super(Menu, self).__init__()
        
        self.children = [
            widgets.HBox([self.sel, self.text]),
            widgets.HBox([self.song_selector, self.lyrics_viewer])]
        
    def handle_city_change(self, change):
        count = city_df.loc[change.new, 'cnt']
        songs = city_df.loc[change.new, 'songs']
        text.value = '<b>Count:</b> {}</br>'.format(count)
        self.song_selector.options = songs
        self.song_selector.value = song_selector.options[0]

    def handle_song_change(self, change):
        city = self.sel.value
        lyrics = songs_df.loc[change.new, 'lyrics']
        lyrics = lyrics.replace('\n', '\\')
        idx = lyrics.find(city)
        lyric_text = '...' + lyrics[max(idx-50, 0):idx+100] + '...'
        lyric_text = lyric_text.replace(city, f'<b>{city}</b>')
        lyric_text = lyric_text.replace(city, f'<span style="color:blue">{city}</span>')
        self.lyrics_viewer.value = '<b>Lyrics:</b> {}</br>'.format(lyric_text)

    self.sel.observe(handle_city_change, names='value')
    self.song_selector.observe(handle_song_change, names='value')

NameError: name 'self' is not defined

In [35]:
# Load city data from file
city_df = pd.read_json('data/city_counts.json')
songs_df = pd.read_json('data/songs.json')

Menu(city_df, songs_df)

In [5]:
song_sel = view.children[1].children[0]

In [7]:
song_sel

In [23]:
from ipywidgets.embed import embed_data
import json

In [36]:
widget = get_place_selector(city_df, songs_df)

In [37]:
data = embed_data(widget)

In [38]:
manager_state = json.dumps(data['manager_state'])
widget_views = [json.dumps(view) for view in data['view_specs']]

In [39]:
widget_views

['{"version_major": 2, "version_minor": 0, "model_id": "30707b86a648426d9c3b908ae99b99b7"}']

In [50]:
widget.children[0].children[0].value = 'Boston'

In [53]:
city_df = city_df.set_index('city')
songs_df = songs_df.set_index('title')

In [57]:
caption = widgets.Label(value='The values of range1 and range2 are synchronized')
range1, range2 = widgets.IntSlider(description='Range 1'),\
                 widgets.IntSlider(description='Range 2')
l = widgets.jslink((range1, 'value'), (range2, 'value'))
display(caption, range1, range2)

In [81]:
# Load city data from file
city_df = pd.read_json('data/city_counts.json')
songs_df = pd.read_json('data/songs.json')

# Reset index
city_df = city_df.set_index('city')
songs_df = songs_df.set_index('title')

In [82]:
sel = widgets.Dropdown(options=sorted(list(city_df.index.unique())))

In [76]:
song_selector = widgets.Select(options=city_df.loc[sel.value, 'songs'])
song_selector

In [79]:
sel.value

'Brownsville'

In [75]:
song_selector.options

('Brownsville Girl',)

In [51]:
import ipywidgets as widgets

def get_place_selector(city_df, songs_df):
    """Return custom Jupyter widget"""

    # Reset index
    city_df = city_df.set_index('city')
    songs_df = songs_df.set_index('title')

    # Define some widgets
    sel = widgets.Dropdown(options=sorted(list(city_df.index.unique())))
    text = widgets.HTML(disabled=False)
    song_selector = widgets.Select(options=city_df.loc[sel.value, 'songs'])
    lyrics_viewer = widgets.HTML(placeholder='Lyrics...')

    # Piece together
    view = widgets.VBox([
        widgets.HBox([sel, text]),
        widgets.HBox([song_selector, lyrics_viewer])])

    def handle_city_change(change):
        count = city_df.loc[change.new, 'cnt']
        songs = city_df.loc[change.new, 'songs']
        text.value = '<b>Count:</b> {}</br>'.format(count)
        song_selector.options = songs
        song_selector.value = song_selector.options[0]

    def handle_song_change(change):
        city = sel.value
        lyrics = songs_df.loc[change.new, 'lyrics']
        lyrics = lyrics.replace('\n', '\\')
        idx = lyrics.find(city)
        lyric_text = '...' + lyrics[max(idx-50, 0):idx+100] + '...'
        lyric_text = lyric_text.replace(city, f'<b>{city}</b>')
        lyric_text = lyric_text.replace(city, f'<span style="color:blue">{city}</span>')
        lyrics_viewer.value = '<b>Lyrics:</b> {}</br>'.format(lyric_text)

    sel.observe(handle_city_change, names='value')
    song_selector.observe(handle_song_change, names='value')

    return view


In [30]:
view

In [9]:
view

### Folium

In [10]:
from folium import plugins

In [11]:
folium_map = folium.Map(
    location=(city_df.lat.median(), city_df.lng.median()),
    zoom_start=2)

popup_text = ("{}<br>"
              "Count: {}<br>")

for idx, row in city_df.iterrows():
    folium.CircleMarker(
        location=(row.lat, row.lng),
        radius=row.cnt,
        color="green",
        fill=True,
        popup=folium.Popup(
            html=popup_text.format(row.city, row.cnt),
            max_width=1000) 
    ).add_to(folium_map)

folium_map

### Ipyleaflet

In [12]:
from ipyleaflet import (
    Map,
    Marker, MarkerCluster,
    TileLayer, ImageOverlay,
    Polyline, Polygon, Rectangle, Circle, CircleMarker,
    GeoJSON,
    DrawControl
)

In [13]:
center = [34.6252978589571, -77.34580993652344]
zoom = 10

In [14]:
m = Map(default_tiles=TileLayer(opacity=1.0), center=center, zoom=zoom)

cm = CircleMarker(
    location=m.center, radius=30, weight=2,
    color='#F00', opacity=1.0, fill_opacity=1.0,
    fill_color='#0F0')

def handle_marker(change):
    print(change)

cm.observe(handle_marker)

In [15]:
m.add_layer(cm)

{'name': 'visible', 'old': False, 'new': True, 'owner': CircleMarker(color='#F00', fill_color='#0F0', fill_opacity=1.0, location=[34.6252978589571, -77.34580993652344], opacity=1.0, options=['class_name', 'clickable', 'color', 'dash_array', 'fill', 'fill_color', 'fill_opacity', 'line_cap', 'line_join', 'pointer_events', 'stroke', 'weight'], radius=30, weight=2), 'type': 'change'}


In [16]:
m

In [17]:
m.layers

(TileLayer(opacity=1.0, options=['attribution', 'detect_retina', 'max_zoom', 'min_zoom', 'tile_size']),
 CircleMarker(color='#F00', fill_color='#0F0', fill_opacity=1.0, location=[34.6252978589571, -77.34580993652344], opacity=1.0, options=['class_name', 'clickable', 'color', 'dash_array', 'fill', 'fill_color', 'fill_opacity', 'line_cap', 'line_join', 'pointer_events', 'stroke', 'weight'], radius=30, weight=2))