In [1]:
import pandas as pd
import os
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [2]:
pd.set_option('display.max_columns', None)

In [3]:
today = datetime.today().strftime('%Y-%m-%d')
todayminus1year = (datetime.now() - relativedelta(years=1)).strftime('%Y-%m-%d')
todayminus2year = (datetime.now() - relativedelta(years=2)).strftime('%Y-%m-%d')

In [4]:
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()
base_dir = os.path.dirname(script_dir)
data_dir = os.path.join(base_dir, "Data", "Widespread_Panic")

In [5]:
songdata = pd.read_csv(os.path.join(data_dir, "songdata.csv"))
showdata = pd.read_csv(os.path.join(data_dir, "showdata.csv"))
setlistdata = pd.read_csv(os.path.join(data_dir, "setlistdata.csv"))

In [6]:
showdata['date'] = pd.to_datetime(showdata['date'], format='%m/%d/%y', errors='coerce')
last_show = showdata['show_index_overall'].max()
showdata.tail()

Unnamed: 0,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,link
3210,2024-05-25,2024,5,25,Saturday,05/25/24,RADIANE AMPHITHEATER AT MEMPHIS BOTANIC GARDEN,MEMPHIS,TN,3211,13,2485,RADIANE AMPHITHEATER AT MEMPHIS BOTANIC GARDEN...,http://everydaycompanion.com/setlists/20240525...
3211,2024-06-20,2024,6,20,Thursday,06/20/24,EMPOWER FIELD AT MILE HIGH,DENVER,CO,3212,14,2486,"EMPOWER FIELD AT MILE HIGH, DENVER, CO",http://everydaycompanion.com/setlists/20240620...
3212,2024-06-21,2024,6,21,Friday,06/21/24,RED ROCKS,MORRISON,CO,3213,15,2487,"RED ROCKS, MORRISON, CO",http://everydaycompanion.com/setlists/20240621...
3213,2024-06-22,2024,6,22,Saturday,06/22/24,RED ROCKS,MORRISON,CO,3214,16,2487,"RED ROCKS, MORRISON, CO",http://everydaycompanion.com/setlists/20240622...
3214,2024-06-23,2024,6,23,Sunday,06/23/24,RED ROCKS,MORRISON,CO,3215,17,2487,"RED ROCKS, MORRISON, CO",http://everydaycompanion.com/setlists/20240623...


In [7]:
songdata.head()

Unnamed: 0,song,code,ftp,ltp,times_played,aka
0,???,???,06/06/87,04/23/93,3,
1,One Kind Favor,1FAVOR,07/18/97,07/24/22,54,
2,One Arm Steve,1STEVE,04/18/98,06/21/24,309,
3,Second Skin,2NDSKN,03/24/05,04/15/24,165,
4,Tonight's the Night,2NNGHT,05/14/03,04/26/05,9,


In [8]:
setlistdata.tail(5)

Unnamed: 0,song_name,set,song_index_set,song_index_show,into,song_notes_key,notes_id,link,song_note_detail
62115,BLUE INDIAN,2:,10,20,1,,0.0,http://everydaycompanion.com/setlists/20240623...,
62116,LAWYERS GUNS AND MONEY,2:,11,21,0,,0.0,http://everydaycompanion.com/setlists/20240623...,
62117,LIFE AS A TREE,E:,1,22,0,,0.0,http://everydaycompanion.com/setlists/20240623...,
62118,PIGEONS,E:,2,23,0,,0.0,http://everydaycompanion.com/setlists/20240623...,
62119,POSTCARD,E:,3,24,0,,0.0,http://everydaycompanion.com/setlists/20240623...,


In [9]:
showdata.head(1)

Unnamed: 0,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,link
0,NaT,1985,0,0,,??/??/85,"A-FRAME, WEYMANDA COURT",ATHENS,GA,1,1,1,"A-FRAME, WEYMANDA COURT, ATHENS, GA",http://everydaycompanion.com/setlists/19850000...


In [10]:
songs_and_shows = pd.merge(setlistdata, showdata, on='link', how='left').sort_values(['song_name','show_index_overall']).reset_index(drop=True)
songs_and_shows['gap'] = songs_and_shows.groupby('song_name')['show_index_overall'].diff()
songs_and_shows.loc[songs_and_shows.groupby('song_name').head(1).index, 'gap'] = None 
songs_and_shows.head()

Unnamed: 0,song_name,set,song_index_set,song_index_show,into,song_notes_key,notes_id,link,song_note_detail,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,gap
0,(DON'T FEAR) THE REAPER,2:,11,19,1,,,http://everydaycompanion.com/setlists/20181028...,with Ben Draper on cowbell,2018-10-28,2018,10,28,Sunday,10/28/18,PARK THEATER,LAS VEGAS,NV,3027,32,2423,"PARK THEATER, LAS VEGAS, NV",
1,(DON'T GO BACK TO) ROCKVILLE,2:,12,21,0,,,http://everydaycompanion.com/setlists/20091230...,"with Mike Mills on vocals, Randall Bramblett ...",2009-12-30,2009,12,30,Wednesday,12/30/09,PHILIPS ARENA,ATLANTA,GA,2490,53,2082,"PHILIPS ARENA, ATLANTA, GA",
2,(SITTIN' ON) THE DOCK OF THE BAY,E:,2,21,1,,0.0,http://everydaycompanion.com/setlists/20051229...,,2005-12-29,2005,12,29,Thursday,12/29/05,THE ROXY,ATLANTA,GA,2185,101,1877,"THE ROXY, ATLANTA, GA",
3,(SITTIN' ON) THE DOCK OF THE BAY,1:,1,1,0,,,http://everydaycompanion.com/setlists/20051231...,,2005-12-31,2005,12,31,Saturday,12/31/05,PHILIPS ARENA,ATLANTA,GA,2187,103,1878,"PHILIPS ARENA, ATLANTA, GA",2.0
4,(SITTIN' ON) THE DOCK OF THE BAY,E:,1,21,0,,,http://everydaycompanion.com/setlists/20060708...,with John Keane on guitar/pedal steel,2006-07-08,2006,7,8,Saturday,07/08/06,GREEK THEATRE,BERKELEY,CA,2206,19,1888,"GREEK THEATRE, BERKELEY, CA",19.0


In [11]:
my_song_data = (songs_and_shows.groupby(['song_name'])
                .agg({
                    'show_index_overall': ['count', 'min', 'max'],
                    'gap': ['min', 'max', 'mean', 'median', 'std']
                })
                .reset_index()
                .round(2)
)
my_song_data.columns = ['_'.join(col).strip() for col in my_song_data.columns.values]
# Rename columns for easier access
my_song_data = my_song_data.rename(columns={
    'song_name_': 'song_name', 
    'show_index_overall_count': 'times_played_total', 
    'show_index_overall_min': 'debut', 
    'show_index_overall_max': 'last_played', 
    'gap_min': 'min_gap', 
    'gap_max': 'max_gap', 
    'gap_mean': 'avg_gap',
    'gap_median': 'med_gap',  
    'gap_std': 'std_gap'
})

my_song_data['current_gap'] = last_show - my_song_data['last_played']

my_song_data = (my_song_data
                .merge(showdata[['show_index_overall', 'date']], left_on='debut', right_on='show_index_overall', how='left')
                .rename(columns={'date': 'debut_date'}).drop(columns=['show_index_overall', 'debut'])
                .merge(showdata[['show_index_overall', 'date']], left_on='last_played', right_on='show_index_overall', how='left')
                .rename(columns={'date': 'ltp_date'}).drop(columns=['show_index_overall', 'last_played'])
)[['song_name', 'times_played_total','debut_date','ltp_date','current_gap','avg_gap', 'med_gap', 'std_gap']]

my_song_data['gap_zscore'] = (my_song_data['current_gap'] - my_song_data['avg_gap']) / my_song_data['std_gap']
my_song_data['debut_date'] = pd.to_datetime(my_song_data['debut_date']).dt.strftime('%m/%d/%y')
my_song_data['ltp_date'] = pd.to_datetime(my_song_data['ltp_date']).dt.strftime('%m/%d/%y')

my_song_data.head()

Unnamed: 0,song_name,times_played_total,debut_date,ltp_date,current_gap,avg_gap,med_gap,std_gap,gap_zscore
0,(DON'T FEAR) THE REAPER,1,10/28/18,10/28/18,188,,,,
1,(DON'T GO BACK TO) ROCKVILLE,1,12/30/09,12/30/09,725,,,,
2,(SITTIN' ON) THE DOCK OF THE BAY,4,12/29/05,03/29/14,454,192.0,19.0,314.48,0.833121
3,1 X 1,147,03/23/95,05/27/23,37,13.71,10.0,15.18,1.534256
4,A HARD RAIN'S A-GONNA FALL,20,12/31/15,03/22/24,13,16.68,15.0,13.22,-0.278366


In [12]:
my_song_data['ltp_date'] = pd.to_datetime(my_song_data['ltp_date'], format='%m/%d/%y', errors='coerce')
three_years_ago = datetime.today() - timedelta(days=3*365)
ck_plus = (my_song_data[(my_song_data['times_played_total'] > 10)&(my_song_data['ltp_date'] > three_years_ago)].copy()
           .sort_values(by='gap_zscore', ascending=False).reset_index(drop=True).drop(columns=['debut_date', 'std_gap','gap_zscore'])
)
ck_plus['current_minus_avg'] = ck_plus['current_gap'] - ck_plus['avg_gap']
ck_plus['current_minus_med'] = ck_plus['current_gap'] - ck_plus['med_gap']

ck_plus.head()

Unnamed: 0,song_name,times_played_total,ltp_date,current_gap,avg_gap,med_gap,current_minus_avg,current_minus_med
0,WEIGHT OF THE WORLD,588,2022-10-23,64,4.43,3.0,59.57,61.0
1,SELL SELL,76,2022-09-18,69,6.4,5.0,62.6,64.0
2,STEVEN'S CAT,52,2022-09-16,71,6.08,4.0,64.92,67.0
3,IT AIN'T NO USE,365,2022-08-11,77,7.52,5.0,69.48,72.0
4,YOU SHOULD BE GLAD,172,2023-02-09,54,6.26,5.0,47.74,49.0


In [14]:
jojos_notebook_data = (songs_and_shows[(songs_and_shows['date'] > todayminus2year)]).reset_index(drop=True)[['song_name', 'show_index_overall', 'date','gap']]

jojos_notebook = (jojos_notebook_data.groupby(['song_name']).agg({
    'show_index_overall': ['count', 'max'],
    'gap': ['min', 'max', 'mean', 'median', 'std']})
                  .reset_index().round(2)
)

jojos_notebook.columns = ['_'.join(col).strip() for col in jojos_notebook.columns.values]

# Rename columns for easier access
jojos_notebook = jojos_notebook.rename(columns={
    'song_name_': 'song_name', 
    'show_index_overall_count': 'times_played_in_last_year', 
    'show_index_overall_max': 'last_played', 
    'gap_min': 'min_gap', 
    'gap_max': 'max_gap', 
    'gap_mean': 'avg_gap',
    'gap_median': 'med_gap',  
    'gap_std': 'std_gap'
})

jojos_notebook['current_gap'] = last_show - jojos_notebook['last_played']

jojos_notebook = (jojos_notebook
                .merge(showdata[['show_index_overall', 'date']], left_on='last_played', right_on='show_index_overall', how='left')
                .rename(columns={'date': 'ltp_date'}).drop(columns=['show_index_overall', 'last_played'])
)[['song_name', 'times_played_in_last_year','ltp_date','current_gap','avg_gap', 'med_gap']]

jojos_notebook = (jojos_notebook[(jojos_notebook['current_gap'] > 3)].sort_values(by='times_played_in_last_year', ascending=False)
                  .reset_index(drop=True)
)

jojos_notebook.head()

Unnamed: 0,song_name,times_played_in_last_year,ltp_date,current_gap,avg_gap,med_gap
0,HENRY PARSONS DIED,14,2024-05-24,5,3.71,4.0
1,ROCK,14,2024-05-25,4,3.79,5.0
2,WONDERING,13,2024-04-25,6,4.15,5.0
3,DISCO,13,2024-05-25,4,3.92,4.0
4,DINER,12,2024-05-25,4,4.33,4.5


In [15]:
# Saving all datasets to CSV
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()
base_dir = os.path.dirname(script_dir)
save_path = os.path.join(base_dir, "Data", "Widespread_Panic")
ck_plus.to_csv(os.path.join(save_path, "ckplus_wsp.csv"), index=False)
jojos_notebook.to_csv(os.path.join(save_path, "jojos_notebook.csv"), index=False)