In [1]:
import pandas as pd
import numpy as np
import requests
import re
from bs4 import BeautifulSoup
from io import StringIO

In [2]:
# Pulling Song Data from All Things Umphreys Website
songlist_url = "https://allthings.umphreys.com/song/"
response = requests.get(songlist_url)
response.raise_for_status()  # Raise an exception for bad status codes
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
tables = soup.find_all('table')
if tables:
    tables_str = str(tables)  # Convert tables to string
    tables_io = StringIO(tables_str)  # Wrap in StringIO
    tables = pd.read_html(tables_io)
songdata_info = tables[1].copy().sort_values(by='Song Name').reset_index(drop=True)
songdata_info.head()

Unnamed: 0,Song Name,Original Artist,Debut Date,Last Played,Times Played Live,Avg Show Gap
0,"""Brendan & Jake switch rigs""",,2010-04-24,2021-08-22,2,438.0
1,"""Mirro Memories""",,2022-11-12,2022-11-12,1,180.0
2,"""Mrs Robinson's Strut""",,2011-04-02,2011-04-02,1,1206.0
3,#19,,2011-04-02,2011-04-02,1,1206.0
4,#5,,2003-05-15,2024-12-12,173,12.91


In [3]:
venues_url = "https://allthings.umphreys.com/venues/"
response = requests.get(venues_url)
response.raise_for_status()  # Raise an exception for bad status codes
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
tables = soup.find_all('table')
if tables:
    tables_str = str(tables)  # Convert tables to string
    tables_io = StringIO(tables_str)  # Wrap in StringIO
    tables = pd.read_html(tables_io)
venue_data = tables[0].copy().reset_index(names='id')
venue_data['id'] = venue_data['id'].astype(str)
venue_data.head()

Unnamed: 0,id,Venue Name,City,State,Country,Times Played,Last Played
0,0,1 Up - Colfax,Denver,CO,USA,1,2013-12-30
1,1,12th & Porter,Nashville,TN,USA,3,2003-10-02
2,2,20 Monroe Live,Grand Rapids,MI,USA,1,2017-02-02
3,3,203 East Marion Street,South Bend,IN,USA,1,1998-10-02
4,4,20th Century Theatre,Cincinnati,OH,USA,2,2002-11-15


In [None]:
song_replacements = {
     "Mirro Memories": "Mirro Memories"
}

In [41]:
song_list = songdata_info['Song Name'].unique().tolist()
songs_to_drop = ['"Mirro Memories"']
song_changes = {
    "Forty-Six & 2": "Forty-Six"
}
song_list = [song for song in song_list if song not in songs_to_drop]
song_list = [song_changes.get(song, song) for song in song_list]

In [39]:
newsong_list = []
for song in song_list:
    new_song = song.replace('"', 'quot').replace('&', 'amp').replace("...", "")
    new_song = re.sub(r"[#():',.]", "", new_song).lower()
    new_song = new_song.replace(" ", "-")
    newsong_list.append(new_song)

In [42]:
base_url = showlist_url_base = "https://allthings.umphreys.com/song/"
test_list = newsong_list[26:100]
song_history1 = []
i = 1
for song in test_list:
    if i % 25 == 0:
        print(f"Scraping song {i} of {len(newsong_list)}")
    url = base_url + song
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    tables = soup.find_all('table')
    if tables:
        tables_str = str(tables)  # Convert tables to string
        tables_io = StringIO(tables_str)  # Wrap in StringIO
        tables = pd.read_html(tables_io)
    table = tables[0].rename(columns={'Date Played': 'Date'})
    table['song'] = song.replace("-", " ").replace("quot", '"').replace("amp", "&").upper()
    song_history1.append(table)
    i += 1

song_history = pd.concat(song_history1).reset_index(drop=True)
song_history

Scraping song 25 of 1009
Scraping song 50 of 1009


Unnamed: 0,Date,Venue,Show Gap,Set,Song Before,Song After,Footnote,song,0,1,2
0,2007-12-31,"Aragon Ballroom, Chicago, IL",1229.0,1,Der Bluten Kat >,> Der Bluten Kat,"debut, Jeff Coffin; with Jeff Coffin on saxophone",A HALF SLEEP,,,
1,2008-05-01,"Market Square, Knoxville, TN",38.0,1,Intentions Clear >,Trenchtown Rock,with Jeff Coffin on saxophone,A HALF SLEEP,,,
2,2009-06-27,"Paper Mill Island Amphitheater, Baldwinsville, NY",1397.0,1,Divisions >,> Get In The Van,"debut, John Coltrane",A LOVE SUPREME,,,
3,2011-01-30,"Ram's Head Live!, Baltimore, MD",184.0,2,Phil's Farm ->,> Blue Echo,,A LOVE SUPREME,,,
4,2011-12-29,"The Pageant, St. Louis, MO",100.0,2,Cemetery Walk II >,> Cemetery Walk II,,A LOVE SUPREME,,,
...,...,...,...,...,...,...,...,...,...,...,...
3666,2001-05-03,"The Vogue Theatre, Indianapolis, IN",51.0,1,Last Man Swerving,> Nothing Too Fancy,,BIRTHDAY,,,
3667,2003-07-08,"House of Glass, Dunsmuir, CA",342.0,2,Roulette,> 13 Days,with Sunshine Superman (Donovan) teases,BIRTHDAY,,,
3668,2009-04-18,"The Morris Performing Arts Center, South Bend, IN",759.0,1,White Man's Moccasins,End of the Road,with Steve Krojniewski on drums,BIRTHDAY,,,
3669,2018-08-11,Farm Bureau Insurance Lawn at White River Stat...,969.0,2,***,Divisions,,BIRTHDAY,,,


In [39]:
showlist_url1 = "https://allthings.umphreys.com/setlists/umphreys-mcgee"
response = requests.get(showlist_url1)
response.raise_for_status()  # Raise an exception for bad status codes
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
selects = soup.find_all('select')
years_html = selects[1]
years_list = [option['value'] for option in years_html.find_all('option') if option['value'].isdigit()][::-1]

In [46]:
showlist_url_base = "https://allthings.umphreys.com/setlists/umphreys-mcgee/"
test_list = years_list[0:1]
year_data = []
for year in test_list:
    url = showlist_url_base + year
    response = requests.get(url)
    response.raise_for_status()  # Raise an exception for bad status codes
    html_content = response.text
    soup = BeautifulSoup(html_content, 'html.parser')
    divs = soup.find_all('div')
    #if tables:
    #    tables_str = str(tables)  # Convert tables to string
    #    tables_io = StringIO(tables_str)  # Wrap in StringIO
    #    tables = pd.read_html(tables_io)
    #raw = tables[0].copy()
    
divs

[<div class="padded-body">
 <div class="container-fluid">
 <nav class="primary-navigation navbar navbar-expand-lg navbar navbar-light sticky-top text-dark" style="background-color:#fff;">
 <div class="container-fluid">
 <a class="navbar-brand" href="/">
 <img alt="All Things Umphrey's" class="d-inline-block" src="https://i.songfishapp.com/allthingsum/62c5d97030c41-logo-black.png" style="max-width:400px;max-height:45px;"/>
 </a>
 <button aria-controls="navbarSupportedContent" aria-expanded="false" aria-label="Toggle navigation" class="navbar-toggler" data-bs-target="#navbarSupportedContent" data-bs-toggle="collapse" type="button">
 <span class="navbar-toggler-icon"></span>
 </button>
 <div class="collapse navbar-collapse" id="navbarSupportedContent">
 <ul class="navbar-nav ms-auto">
 <li class="nav-item">
 <a class="nav-link" href="/">Home</a>
 </li>
 <li class="nav-item dropdown">
 <a aria-expanded="false" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" href="#" id="navbarDr