In [9]:
import pandas as pd
import os
from datetime import datetime, timedelta, date
from dateutil.relativedelta import relativedelta

In [10]:
pd.set_option('display.max_columns', None)

In [11]:
today = datetime.today().strftime('%Y-%m-%d')
todayminus1year = (datetime.now() - relativedelta(years=1)).strftime('%Y-%m-%d')
todayminus2year = (datetime.now() - relativedelta(years=2)).strftime('%Y-%m-%d')

In [14]:
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()
base_dir = os.path.dirname(script_dir)
data_dir = os.path.join(base_dir, "Data", "WSP", "From Web")

In [15]:
songdata = pd.read_csv(os.path.join(data_dir, "songdata.csv"))
songdata['song'] = songdata['song'].str.title()
showdata = pd.read_csv(os.path.join(data_dir, "showdata.csv"))

In [16]:
setlistdata = pd.read_csv(os.path.join(data_dir, "setlistdata.csv")).dropna(subset='song_name')
setlistdata['song_name'] = setlistdata['song_name'].str.title()
setlistdata = setlistdata.sort_values(by=['link', 'song_name', 'song_index_show']).reset_index(drop=True)
setlistdata['isreprise'] = setlistdata.groupby(['link', 'song_name']).cumcount().astype(int)
setlistdata = setlistdata[setlistdata['isreprise']==0].sort_values(by=['link', 'song_index_show']).reset_index(drop=True)

In [17]:
showdata['date'] = pd.to_datetime(showdata['date'], format='%m/%d/%y', errors='coerce')
last_show = showdata['show_index_overall'].max()
print(last_show)
showdata.tail()

3215


Unnamed: 0,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,link
3210,2024-05-25,2024,5,25,Saturday,05/25/24,RADIANE AMPHITHEATER AT MEMPHIS BOTANIC GARDEN,MEMPHIS,TN,3211,13,2485,RADIANE AMPHITHEATER AT MEMPHIS BOTANIC GARDEN...,http://www.everydaycompanion.com/setlists/2024...
3211,2024-06-20,2024,6,20,Thursday,06/20/24,EMPOWER FIELD AT MILE HIGH,DENVER,CO,3212,14,2486,"EMPOWER FIELD AT MILE HIGH, DENVER, CO",http://www.everydaycompanion.com/setlists/2024...
3212,2024-06-21,2024,6,21,Friday,06/21/24,RED ROCKS,MORRISON,CO,3213,15,2487,"RED ROCKS, MORRISON, CO",http://www.everydaycompanion.com/setlists/2024...
3213,2024-06-22,2024,6,22,Saturday,06/22/24,RED ROCKS,MORRISON,CO,3214,16,2487,"RED ROCKS, MORRISON, CO",http://www.everydaycompanion.com/setlists/2024...
3214,2024-06-23,2024,6,23,Sunday,06/23/24,RED ROCKS,MORRISON,CO,3215,17,2487,"RED ROCKS, MORRISON, CO",http://www.everydaycompanion.com/setlists/2024...


In [18]:
songdata.head()

Unnamed: 0,song,code,ftp,ltp,times_played,aka
0,???,???,06/06/87,04/23/93,3,
1,One Kind Favor,1FAVOR,07/18/97,07/24/22,54,
2,One Arm Steve,1STEVE,04/18/98,06/21/24,309,
3,Second Skin,2NDSKN,03/24/05,04/15/24,165,
4,Tonight'S The Night,2NNGHT,05/14/03,04/26/05,9,


In [19]:
setlistdata.tail(5)

Unnamed: 0,song_name,set,song_index_set,song_index_show,into,song_notes_key,notes_id,link,song_note_detail,isreprise
59842,Blue Indian,2:,10,20,1,,0.0,http://www.everydaycompanion.com/setlists/2024...,,0
59843,Lawyers Guns And Money,2:,11,21,0,,0.0,http://www.everydaycompanion.com/setlists/2024...,,0
59844,Life As A Tree,E:,1,22,0,,0.0,http://www.everydaycompanion.com/setlists/2024...,,0
59845,Pigeons,E:,2,23,0,,0.0,http://www.everydaycompanion.com/setlists/2024...,,0
59846,Postcard,E:,3,24,0,,0.0,http://www.everydaycompanion.com/setlists/2024...,,0


In [21]:
setlist_by_song = (pd.merge(setlistdata, 
                                   showdata, 
                                   on='link', 
                                   how='left')
                           .sort_values(['song_name','show_index_overall'])
                           .reset_index(drop=True))
        
setlist_by_song['gap'] = setlist_by_song.groupby('song_name')['show_index_overall'].diff()
setlist_by_song.loc[setlist_by_song.groupby('song_name').head(1).index, 'gap'] = None 
setlist_by_song.head()

Unnamed: 0,song_name,set,song_index_set,song_index_show,into,song_notes_key,notes_id,link,song_note_detail,isreprise,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,gap
0,(Don'T Fear) The Reaper,2:,11,19,1,,,http://www.everydaycompanion.com/setlists/2018...,with Ben Draper on cowbell,0,10/28/18,2018,10,28,Sunday,10/28/18,PARK THEATER,LAS VEGAS,NV,3027,32,2423,"PARK THEATER, LAS VEGAS, NV",
1,(Don'T Go Back To) Rockville,2:,12,21,0,,,http://www.everydaycompanion.com/setlists/2009...,"with Mike Mills on vocals, Randall Bramblett ...",0,12/30/09,2009,12,30,Wednesday,12/30/09,PHILIPS ARENA,ATLANTA,GA,2490,53,2082,"PHILIPS ARENA, ATLANTA, GA",
2,(Sittin' On) The Dock Of The Bay,E:,2,21,1,,0.0,http://www.everydaycompanion.com/setlists/2005...,,0,12/29/05,2005,12,29,Thursday,12/29/05,THE ROXY,ATLANTA,GA,2185,101,1877,"THE ROXY, ATLANTA, GA",
3,(Sittin' On) The Dock Of The Bay,1:,1,1,0,,,http://www.everydaycompanion.com/setlists/2005...,,0,12/31/05,2005,12,31,Saturday,12/31/05,PHILIPS ARENA,ATLANTA,GA,2187,103,1878,"PHILIPS ARENA, ATLANTA, GA",2.0
4,(Sittin' On) The Dock Of The Bay,E:,1,21,0,,,http://www.everydaycompanion.com/setlists/2006...,with John Keane on guitar/pedal steel,0,07/08/06,2006,7,8,Saturday,07/08/06,GREEK THEATRE,BERKELEY,CA,2206,19,1888,"GREEK THEATRE, BERKELEY, CA",19.0


In [15]:
songdata.head(5)

Unnamed: 0,song,code,ftp,ltp,times_played,aka
0,???,???,06/06/87,04/23/93,3,
1,One Kind Favor,1FAVOR,07/18/97,07/24/22,54,
2,One Arm Steve,1STEVE,04/18/98,06/21/24,309,
3,Second Skin,2NDSKN,03/24/05,04/15/24,165,
4,Tonight's the Night,2NNGHT,05/14/03,04/26/05,9,


In [43]:
my_song_data = (setlist_by_song.groupby(['song_name'])
                .agg({
                    'show_index_overall': ['count', 'min', 'max'],
                    'gap': ['min', 'max', 'mean', 'median', 'std']
                })
                .reset_index()
                .round(2)
)

my_song_data.columns = ['_'.join(col).strip() for col in my_song_data.columns.values]

my_song_data = my_song_data.rename(columns={
    'song_name_': 'song_name', 
    'show_index_overall_count': 'times_played_total', 
    'show_index_overall_min': 'debut', 
    'show_index_overall_max': 'last_played', 
    'gap_min': 'min_gap', 
    'gap_max': 'max_gap', 
    'gap_mean': 'avg_gap',
    'gap_median': 'med_gap',  
    'gap_std': 'std_gap'
})

my_song_data['current_gap'] = last_show - my_song_data['last_played']

my_song_data = (my_song_data
                .merge(showdata[['show_index_overall', 'date']], left_on='debut', right_on='show_index_overall', how='left')
                .rename(columns={'date': 'debut_date'}).drop(columns=['show_index_overall', 'debut'])
                .merge(showdata[['show_index_overall', 'date']], left_on='last_played', right_on='show_index_overall', how='left')
                .rename(columns={'date': 'ltp_date'}).drop(columns=['show_index_overall', 'last_played'])
)[['song_name', 'times_played_total','debut_date','ltp_date','current_gap','avg_gap', 'med_gap', 'std_gap']]

my_song_data['gap_zscore'] = (my_song_data['current_gap'] - my_song_data['avg_gap']) / my_song_data['std_gap']

five_years_ago = date.today() - timedelta(days=5*365)
my_song_data['ltp_date'] = pd.to_datetime(my_song_data['ltp_date'], format='%m/%d/%y').dt.date

my_song_data.head()

Unnamed: 0,song_name,times_played_total,debut_date,ltp_date,current_gap,avg_gap,med_gap,std_gap,gap_zscore
0,(Don'T Fear) The Reaper,1,10/28/18,2018-10-28,188,,,,
1,(Don'T Go Back To) Rockville,1,12/30/09,2009-12-30,725,,,,
2,(Sittin' On) The Dock Of The Bay,4,12/29/05,2014-03-29,454,192.0,19.0,314.48,0.833121
3,1 X 1,147,03/23/95,2023-05-27,37,13.71,10.0,15.18,1.534256
4,A Hard Rain'S A-Gonna Fall,20,12/31/15,2024-03-22,13,16.68,15.0,13.22,-0.278366


In [44]:
ck_plus = (my_song_data[(my_song_data['times_played_total'] > 10) & 
                        (my_song_data['ltp_date'] > five_years_ago)].copy()           
           .sort_values(by='gap_zscore', ascending=False)
           .reset_index(drop=True)
           .drop(columns=['debut_date', 'std_gap','gap_zscore'])
)
        
ck_plus['current_minus_avg'] = ck_plus['current_gap'] - ck_plus['avg_gap']
ck_plus['current_minus_med'] = ck_plus['current_gap'] - ck_plus['med_gap']

ck_plus.head()

Unnamed: 0,song_name,times_played_total,ltp_date,current_gap,avg_gap,med_gap,current_minus_avg,current_minus_med
0,Weight Of The World,588,2022-10-23,64,4.43,3.0,59.57,61.0
1,Sell Sell,76,2022-09-18,69,6.4,5.0,62.6,64.0
2,Steven'S Cat,52,2022-09-16,71,6.08,4.0,64.92,67.0
3,It Ain'T No Use,365,2022-08-11,77,7.52,5.0,69.48,72.0
4,You Should Be Glad,171,2023-02-09,54,6.3,5.0,47.7,49.0


# Split

In [48]:
setlist_by_song.head(1)

Unnamed: 0,song_name,set,song_index_set,song_index_show,into,song_notes_key,notes_id,link,song_note_detail,isreprise,date,year,month,day,weekday,date_ec,venue,city,state,show_index_overall,show_index_withinyear,run_index,venue_full,gap
0,(Don'T Fear) The Reaper,2:,11,19,1,,,http://www.everydaycompanion.com/setlists/2018...,with Ben Draper on cowbell,0,10/28/18,2018,10,28,Sunday,10/28/18,PARK THEATER,LAS VEGAS,NV,3027,32,2423,"PARK THEATER, LAS VEGAS, NV",


In [50]:
one_year_ago = date.today() - timedelta(days=2*366)
setlist_by_song['date'] = pd.to_datetime(setlist_by_song['date'], format='%m/%d/%y').dt.date

jojos_notebook_data = (setlist_by_song[setlist_by_song['date'] > one_year_ago]
                       ).reset_index(drop=True)[['song_name', 'show_index_overall', 'date','gap']]

jojos_notebook_data.head()

Unnamed: 0,song_name,show_index_overall,date,gap
0,1 X 1,3178,2023-05-27,29.0
1,A Hard Rain'S A-Gonna Fall,3181,2023-06-24,21.0
2,A Hard Rain'S A-Gonna Fall,3202,2024-03-22,21.0
3,A Of D,3164,2023-03-03,15.0
4,Ace Of Spades,3173,2023-04-19,22.0


In [None]:
jojos_notebook = (
            jojos_notebook_data
            .groupby(['song_name'])
            .agg({
                'show_index_overall': ['count', 'max'],
                'gap': ['min', 'max', 'mean', 'median', 'std']
            })
            .reset_index()
            .round(2)
        )
jojos_notebook.columns = ['_'.join(col).strip() for col in jojos_notebook.columns.values]

jojos_notebook = jojos_notebook.rename(columns={
    'song_name_': 'song', 
    'show_index_overall_count': 'times_played_in_last_year', 
    'show_index_overall_max': 'last_played', 
    'gap_min': 'min_gap', 
    'gap_max': 'max_gap', 
    'gap_mean': 'avg_gap',
    'gap_median': 'med_gap',  
    'gap_std': 'std_gap'
})

jojos_notebook['current_gap'] = last_show - jojos_notebook['last_played']

jojos_notebook = (
    jojos_notebook
    .merge(
        showdata[['show_index_overall', 'date']], 
        left_on='last_played', 
        right_on='show_index_overall', 
        how='left'
        )
    .rename(columns={'date': 'ltp_date'})
    .drop(columns=['show_index_overall', 'last_played'])
    )[['song', 'times_played_in_last_year', 'ltp_date','current_gap', 'avg_gap', 'med_gap']]
        
jojos_notebook.head()

Unnamed: 0,song,times_played_in_last_year,last_played,min_gap,max_gap,avg_gap,med_gap,std_gap,current_gap
0,1 X 1,1,3178,29.0,29.0,29.0,29.0,,37
1,A Hard Rain'S A-Gonna Fall,2,3202,21.0,21.0,21.0,21.0,0.0,13
2,A Of D,1,3164,15.0,15.0,15.0,15.0,,51
3,Ace Of Spades,1,3173,22.0,22.0,22.0,22.0,,42
4,Action Man,8,3215,4.0,10.0,6.62,5.5,2.39,0


In [55]:
jojos_notebook = (
    jojos_notebook[(jojos_notebook['current_gap'] > 3)]
    .sort_values(by='times_played_in_last_year', ascending=False)
    .reset_index(drop=True)
)

jojos_notebook.head()

Unnamed: 0,song,times_played_in_last_year,ltp_date,current_gap,avg_gap,med_gap
0,Rock,12,05/25/24,4,4.42,5.0
1,Henry Parsons Died,11,05/24/24,5,4.45,4.0
2,Disco,11,05/25/24,4,4.64,4.0
3,Wondering,10,04/25/24,6,4.8,5.0
4,Big Wooly Mammoth,10,05/25/24,4,5.2,5.0


In [None]:
jojos_notebook = (jojos_notebook[(jojos_notebook['current_gap'] > 3)].sort_values(by='times_played_in_last_year', ascending=False)
                  .reset_index(drop=True)
)

jojos_notebook.head()

Unnamed: 0,song_name,times_played_in_last_year,ltp_date,current_gap,avg_gap,med_gap
0,HENRY PARSONS DIED,14,2024-05-24,5,3.71,4.0
1,ROCK,14,2024-05-25,4,3.79,5.0
2,WONDERING,13,2024-04-25,6,4.15,5.0
3,DISCO,13,2024-05-25,4,3.92,4.0
4,DINER,12,2024-05-25,4,4.33,4.5


In [15]:
# Saving all datasets to CSV
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()
base_dir = os.path.dirname(script_dir)
save_path = os.path.join(base_dir, "Data", "Widespread_Panic")
ck_plus.to_csv(os.path.join(save_path, "ckplus_wsp.csv"), index=False)
jojos_notebook.to_csv(os.path.join(save_path, "jojos_notebook.csv"), index=False)