In [1]:
import requests
import json
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta
from bs4 import BeautifulSoup
from io import StringIO
import os


In [46]:
pd.set_option('display.max_columns', None)

In [3]:
today = datetime.today().strftime('%Y-%m-%d')

In [8]:
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()
base_dir = os.path.dirname(script_dir)
higher_dir = os.path.dirname(base_dir)
credentials_dir = os.path.join(higher_dir, "Credentials")
with open(os.path.join(credentials_dir, "phish_net.txt"), "r") as file:
    line = file.readline().strip()  # Read the first line and remove whitespace

# Extract the API key
api_key = line.split(": ")[1].strip("'")

In [14]:
# Importing Song List from API
songlist_url_api = "https://api.phish.net/v5/songs.json?order_by=song&apikey=" + api_key
songlistdata_json = requests.get(songlist_url_api).json()
song_df = pd.DataFrame(songlistdata_json['data'])
songdata_og = song_df.drop(columns=['slug','last_permalink','debut_permalink'])

songdata_og.head()

Unnamed: 0,songid,song,abbr,artist,debut,last_played,times_played,gap
0,2301,(I Can't Get No) Satisfaction,,The Rolling Stones,2022-02-25,2022-02-25,1,138
1,2947,...And Flew Away,,Trey Anastasio,2023-07-14,2023-07-14,1,79
2,1,1999,,Prince,1998-12-31,2022-08-13,4,103
3,2908,20-20 Vision,,Gene Autry,2019-12-28,2019-12-28,1,183
4,2,46 Days,,Phish,2003-01-02,2025-01-30,148,2


In [16]:
# Pulling Song Data from Phish.netWebsite
songlist_url = "https://phish.net/song"
response = requests.get(songlist_url)
response.raise_for_status()  # Raise an exception for bad status codes
html_content = response.text
soup = BeautifulSoup(html_content, 'html.parser')
tables = soup.find_all('table')
if tables:
    tables_str = str(tables)  # Convert tables to string
    tables_io = StringIO(tables_str)  # Wrap in StringIO
    tables = pd.read_html(tables_io)
songdata_info = tables[0].copy().sort_values(by='Song Name').reset_index(drop=True)
songdata_info.head()

Unnamed: 0,Song Name,Original Artist,Times,Debut,Last,Gap
0,(I Can't Get No) Satisfaction,The Rolling Stones,1,2022-02-25,2022-02-25,143
1,...And Flew Away,Trey Anastasio,1,2023-07-14,2023-07-14,84
2,1999,Prince,4,1998-12-31,2022-08-13,108
3,20-20 Vision,Gene Autry,1,2019-12-28,2019-12-28,221
4,2001,"Strauss, as interpreted by Deodato",Alias of Also Sprach Zarathustra,Alias of Also Sprach Zarathustra,Alias of Also Sprach Zarathustra,Alias of Also Sprach Zarathustra


In [None]:
# Combining Song List and Song Data into final data 
songdata = songdata_og.merge(songdata_info, left_on="song", right_on="Song Name", how="inner")[['songid','Song Name', 'Original Artist', 'Debut','last_played',\
                                                                                                'times_played', 'gap']]
songdata = songdata.rename(columns={'songid': 'song_id', 'Song Name': 'song', 'Original Artist': 'original_artist', 'Debut': 'debut_date', 
                                    'last_played': 'last_played'})

songdata.head()

Unnamed: 0,song_id,song,original_artist,debut_date,last_played,times_played,gap
0,2301,(I Can't Get No) Satisfaction,The Rolling Stones,2022-02-25,2022-02-25,1,138
1,2947,...And Flew Away,Trey Anastasio,2023-07-14,2023-07-14,1,79
2,1,1999,Prince,1998-12-31,2022-08-13,4,103
3,2908,20-20 Vision,Gene Autry,2019-12-28,2019-12-28,1,183
4,2,46 Days,Phish,2003-01-02,2025-01-30,148,2


In [31]:
# Importing Show List from API
showlist_url = "https://api.phish.net/v5/shows/artist/phish.json?order_by=showdate&apikey=" + api_key
showlistdata_json = requests.get(showlist_url).json()
showlist_df = pd.DataFrame(showlistdata_json['data'])
past_df = showlist_df[showlist_df['showdate'] < today]
future_df = showlist_df[showlist_df['showdate'] >= today].sort_values('showdate').head(1)
phish_showlist_df = pd.concat([past_df, future_df])

phish_showlist_df.head(1)

Unnamed: 0,showid,showyear,showmonth,showday,showdate,permalink,exclude_from_stats,venueid,setlist_notes,venue,city,state,country,artistid,artist_name,tourid,tour_name,created_at,updated_at
0,1251168326,1983,10,30,1983-10-30,https://phish.net/setlists/phish-october-30-19...,1,7,Throughout most of Phish history this was unde...,Harris-Millis Cafeteria - University of Vermont,Burlington,VT,USA,1,Phish,61.0,Not Part of a Tour,,


In [34]:
# Creating Venue Dataset
venuedata = phish_showlist_df[['venueid', 'venue', 'city', 'state', 'country']].drop_duplicates().sort_values(by=['venueid'], ascending=True).reset_index(drop=True)

In [36]:
# Creating Tour Dataset
tourdata = phish_showlist_df[['tourid', 'tour_name']].drop_duplicates().sort_values(by=['tourid'], ascending=True).reset_index(drop=True)

In [43]:
# Creating Show Dataset
showdata = (phish_showlist_df[['showid', 'showdate', 'venueid', 'tourid', 'exclude_from_stats','setlist_notes']]
            .copy().sort_values(by=['showdate'], ascending=[True])
            .reset_index(names='show_number')
            .assign(show_number= lambda x: x['show_number'] + 1)
)
showdata['tourid'] = showdata['tourid'].astype('Int64').astype(str)
showdata.tail()

Unnamed: 0,show_number,showid,showdate,venueid,tourid,exclude_from_stats,setlist_notes
2162,2163,1718730861,2025-01-29,1481,208,0,<p>Trey teased Mike&#39;s Song in Weekapaug Gr...
2163,2164,1718730894,2025-01-30,1481,208,0,<p>Page teased We&#39;re Off to See the Wizard...
2164,2165,1718730936,2025-01-31,1481,208,0,<p>Wolfman&#39;s Brother contained Mike&#39;s ...
2165,2166,1718730981,2025-02-01,1481,208,0,"<p>Before Bathtub Gin, Trey said they would pl..."
2166,2167,1737486654,2025-04-18,1570,209,0,


In [47]:
# Pulling Setlist Data from API
setlist_url = "https://api.phish.net/v5/setlists.json?apikey=" + api_key
setlistdata_json = requests.get(setlist_url).json()
setlist_df = pd.DataFrame(setlistdata_json['data'])
setlist_df.head()

Unnamed: 0,showid,showdate,permalink,showyear,uniqueid,meta,reviews,exclude,setlistnotes,soundcheck,songid,position,transition,footnote,set,isjam,isreprise,isjamchart,jamchart_description,tracktime,gap,tourid,tourname,tourwhen,song,nickname,slug,is_original,venueid,venue,city,state,country,trans_mark,artistid,artist_slug,artist_name
0,1326251770,1982-12-07,https://phish.net/setlists/trey-anastasio-dece...,1982,181509,Space Antelope,0,0,"This list is likely incomplete, and the date m...",,1750,1,2,,1,0,0,0,,,0,61,Not Part of a Tour,No Tour,Lifespace,Lifespace,lifespace,0,1140,The Taft School,Watertown,CT,USA,>,2,trey-anastasio,Trey Anastasio
1,1326251770,1982-12-07,https://phish.net/setlists/trey-anastasio-dece...,1982,181510,Space Antelope,0,0,"This list is likely incomplete, and the date m...",,16,2,1,,1,0,0,0,,,0,61,Not Part of a Tour,No Tour,All Along the Watchtower,All Along the Watchtower,all-along-the-watchtower,0,1140,The Taft School,Watertown,CT,USA,",",2,trey-anastasio,Trey Anastasio
2,1326251770,1982-12-07,https://phish.net/setlists/trey-anastasio-dece...,1982,181511,Space Antelope,0,0,"This list is likely incomplete, and the date m...",,1618,3,1,,1,0,0,0,,,0,61,Not Part of a Tour,No Tour,Franklin's Tower,Franklin's Tower,franklins-tower,0,1140,The Taft School,Watertown,CT,USA,",",2,trey-anastasio,Trey Anastasio
3,1326251770,1982-12-07,https://phish.net/setlists/trey-anastasio-dece...,1982,199504,Space Antelope,0,0,"This list is likely incomplete, and the date m...",,1752,4,1,,1,0,0,0,,,0,61,Not Part of a Tour,No Tour,Goin' Down the Road,Goin' Down the Road,goin-down-the-road,0,1140,The Taft School,Watertown,CT,USA,",",2,trey-anastasio,Trey Anastasio
4,1326251770,1982-12-07,https://phish.net/setlists/trey-anastasio-dece...,1982,199505,Space Antelope,0,0,"This list is likely incomplete, and the date m...",,1751,5,1,,1,0,0,0,,,0,61,Not Part of a Tour,No Tour,Hemispheres,Hemispheres,hemispheres,0,1140,The Taft School,Watertown,CT,USA,",",2,trey-anastasio,Trey Anastasio


In [51]:
# Creating Transition Data
transition_data  = setlist_df[['transition', 'trans_mark']].drop_duplicates().sort_values(by=['transition'], ascending=True).reset_index(drop=True)

In [53]:
# Creating Setlist Data
setlistdata = setlist_df[['showid', 'uniqueid', 'songid','set','position','transition','isreprise','isjam','isjamchart', 'jamchart_description',
                          'tracktime', 'gap', 'is_original','soundcheck','footnote','exclude']].copy()
setlistdata.head()

Unnamed: 0,showid,uniqueid,songid,set,position,transition,isreprise,isjam,isjamchart,jamchart_description,tracktime,gap,is_original,soundcheck,footnote,exclude
0,1326251770,181509,1750,1,1,2,0,0,0,,,0,0,,,0
1,1326251770,181510,16,1,2,1,0,0,0,,,0,0,,,0
2,1326251770,181511,1618,1,3,1,0,0,0,,,0,0,,,0
3,1326251770,199504,1752,1,4,1,0,0,0,,,0,0,,,0
4,1326251770,199505,1751,1,5,1,0,0,0,,,0,0,,,0


In [54]:
save_path = os.path.join(base_dir, "Data", "Phish")
songdata.to_csv(os.path.join(save_path, "songdata.csv"), index=False)
venuedata.to_csv(os.path.join(save_path, "venuedata.csv"), index=False)
showdata.to_csv(os.path.join(save_path, "showdata.csv"), index=False)
transition_data.to_csv(os.path.join(save_path, "transition_data.csv"), index=False)
setlistdata.to_csv(os.path.join(save_path, "setlistdata.csv"), index=False)