# Wikipedia Artist Detail Download

In [1]:
import os
import re
from urllib.parse import urljoin
import datetime as dt

import requests
from bs4 import BeautifulSoup
import sqlite3
import pandas as pd
import numpy as np
import scipy
import scipy.sparse

import matplotlib.pyplot as plt
from sklearn.cluster import MiniBatchKMeans

from IPython.display import display, HTML, clear_output

In [2]:
%run ./url_cache.ipynb
%run ./wikipedia_access.ipynb
%run ./wikipedia_album_detail_lib.ipynb

## Configure DB and Load Album Extract

In [3]:
db = sqlite3.Connection("var/data/music.db")

In [4]:
url_cache = URLContentCacheSqlite(db)

wayl_db = WikipediaAlbumYearListsDB(db)
albums = wayl_db.get_all()

### Get Artist Links

In [5]:
def get_artist_link(links):
    if links is not None and 'Artist' in links:
        return links['Artist'][0]['href']
    
albums['ArtistLink'] = albums.Links.apply(get_artist_link)

In [6]:
artists = albums[albums.ArtistLink.notnull()].drop_duplicates('Artist')[['Artist', 'ArtistLink']].copy().sort_values(by='Artist')

In [7]:
# See what was downloaded already
downloaded_urls = pd.read_sql(f"""
    select url
    from url_cache
""", db)

downloaded_urls = set(downloaded_urls.url)

artists['PrevDownloaded'] = artists.ArtistLink.isin(downloaded_urls)
print(f"Previously downloaded: {artists['PrevDownloaded'].sum()}")

d_count = artists['PrevDownloaded'].sum()

for _, artist_row in artists[~artists.PrevDownloaded].iterrows():
    print(f"[{d_count + 1}] Getting: {artist_row.Artist}")
    url_cache.get(artist_row.ArtistLink)
    d_count += 1
    if (d_count % 25) == 0:
        clear_output()

[11126] Getting: t.A.T.u.
[11127] Getting: tobyMac
[11128] Getting: tyDi
[11129] Getting: van Canto
[11130] Getting: will.i.am
[11131] Getting: yourcodenameis:milo
[11132] Getting: Álex Anwandter
[11133] Getting: Ásgeir
[11134] Getting: Ævangelist
[11135] Getting: Éric Lapointe
[11136] Getting: Ólafur Arnalds
[11137] Getting: Øystein Sevåg
[11138] Getting: Ünloco
[11139] Getting: …And You Will Know Us by the Trail of Dead
[11140] Getting: ≠Me


## Check An Arist Page

In [11]:
artist = artists[artists.Artist.str.contains("Run the Jewels")]
artist

Unnamed: 0_level_0,Artist,ArtistLink,PrevDownloaded
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2389,Run the Jewels,https://en.wikipedia.org/wiki/Run_the_Jewels,False


In [12]:
a_html = url_cache.get(artist.iloc[0].ArtistLink)['content']
display(HTML(a_html))

Run the Jewels,Run the Jewels.1
Killer Mike (left) and El-P (right) in March 2014,Killer Mike (left) and El-P (right) in March 2014
Background information,Background information
Also known as,RTJ
Origin,United States
Genres,Hip hop political hip hop
Years active,2013–present
Labels,Jewel Runners BMG Mass Appeal Fool's Gold Big Dada Definitive Jux
Associated acts,Boots Zack de la Rocha DJ Shadow Gangsta Boo Little Shalimar Trackstar the DJ Wilder Zoby
Website,runthejewels.com
,

Year,Nominee / work,Award,Result,Ref.
2018,"""Chase Me"" (Danger Mouse Featuring Run The Jewels & Big Boi)",Best Rap Song,Nominated,[52]

Year,Nominee / work,Award,Result,Ref.
2014,A Christmas F*cking Miracle,Best Urban Video – Budget,Nominated,[53]
2015,Close Your Eyes (And Count to Fuck) (feat Zack de la Rocha),Best Urban Video-International,Nominated,[53]
2015,"Lie, Cheat, Steal",Best Urban Video – Budget,Nominated,[53]
2015,Early,Best Lyric Video,Nominated,[53]
2016,Love Again (with Gangsta Boo),Best Urban Video-International,Won,[53]
2016,Love Again (with Gangsta Boo),Best Editing in a Video,Won,[53]

Year,Nominee / work,Award,Result,Ref.
2016,Run The Jewels,Best International Band,Won,[54]

Year,Nominee / work,Award,Result,Ref.
2015,Meow the Jewels / Tag the Jewels,Marketing Genius Award,Won,[55]
2015,Run The Jewels,Hardest Working Artist of the Year,Nominated,[55]
2015,Run the Jewels 2,Groundbreaking Album of the Year,Won,[55]
2015,Run the Jewels 2,Album of the Year,Nominated,[55]
2016,Run The Jewels,Best Live Act,Nominated,[55]
2016,Meow The Jewels,Creative Packaging,Nominated,[55]
2016,#12DaysofRTJ,Marketing Genius Award,Won,[55]
2016,Close Your Eyes (and Count to F*ck),Video of the Year,Won,[55]
2017,Run the Jewels 3,Album of the Year,Nominated,[55]
2017,Run the Jewels 3,Best Rap/Hip-Hop Album,Won,[55]

vteRun the Jewels,vteRun the Jewels.1
El-P (discography) Killer Mike (discography),El-P (discography) Killer Mike (discography)
Studio albums,Run the Jewels (2013) Run the Jewels 2 (2014) Run the Jewels 3 (2016) RTJ4 (2020)
Remix album,Meow the Jewels (2015)
Singles,"""Close Your Eyes (And Count to Fuck)"" ""Nobody Speak"" ""Legend Has It"""
Concert tours,Run the World Tour
Related articles,"Discography ""Supercut"" (El-P remix)"

vteEl-P,vteEl-P.1
Discography Production discography,Discography Production discography
Studio albums,Fantastic Damage I'll Sleep When You're Dead Cancer 4 Cure
Instrumental albums,El-P Presents Cannibal Oxtrumentals FanDam Plus High Water Collecting the Kid
EPs,Shards of Pol-Pottery
Scores,Bomb the System Capone
Related articles,Company Flow Definitive Jux The Weathermen Run the Jewels

vteKiller Mike,vteKiller Mike.1
Discography,"Studio albums Monster I Pledge Allegiance to the Grind I Pledge Allegiance to the Grind II PL3DGE R.A.P. Music Compilations Underground Atlanta Singles ""A.D.I.D.A.S."" Featured singles ""The Whole World"" ""Land of a Million Drums"" ""Never Scared"""
Studio albums,Monster I Pledge Allegiance to the Grind I Pledge Allegiance to the Grind II PL3DGE R.A.P. Music
Compilations,Underground Atlanta
Singles,"""A.D.I.D.A.S."""
Featured singles,"""The Whole World"" ""Land of a Million Drums"" ""Never Scared"""
Related articles,Dungeon Family Purple Ribbon All-Stars Run the Jewels Trigger Warning with Killer Mike

0,1
Studio albums,Monster I Pledge Allegiance to the Grind I Pledge Allegiance to the Grind II PL3DGE R.A.P. Music
Compilations,Underground Atlanta
Singles,"""A.D.I.D.A.S."""
Featured singles,"""The Whole World"" ""Land of a Million Drums"" ""Never Scared"""

vteConsequence Artist / Band of the Year,vteConsequence Artist / Band of the Year.1
Artist,2010 Kanye West 2011 James Blake 2012 Frank Ocean 2013 Kanye West 2014 Run the Jewels 2015 Kendrick Lamar 2016 Chance the Rapper 2017 Lorde 2018 Janelle Monáe 2019 Billie Eilish 2020 Phoebe Bridgers
Band,2010 The Roots 2011 Foo Fighters 2012 Death Grips 2013 Arcade Fire 2014 The War on Drugs 2015 Tame Impala 2016 Bruce Springsteen and the E Street Band 2017 King Gizzard & the Lizard Wizard 2018 Pearl Jam 2019 Tool 2020 BTS

Authority control,Authority control.1
General,VIAF 1 WorldCat (via VIAF)
National libraries,United States
Other,MusicBrainz artist
