In [183]:
import csv

with open("data/setlist.tsv") as f:
    rd = csv.reader(f, delimiter="\t", quotechar='"')
    original_lines = [row for row in rd]

In [184]:
from typing import List, Optional

class Set:
    label: str = None
    annotation: str = None
    songs: List[str] = None

    def __len__(self) -> int:
        return len(self.songs)

    def __repr__(self) -> str:
        ret = f"{self.label} {"(" + self.annotation + ")" if self.annotation else ""}"
        ret += "\n" + "\n".join(self.songs)
        return ret


class Show:
    date: str = None
    venue1: str = None
    venue2: Optional[str] = None
    city: str = None
    state_or_country: str = None
    notes: Optional[str] = None

    lines = []

    sets = []

    def __len__(self) -> int:
        return sum([len(s) for s in self.sets])

    def __repr__(self) -> str:
        venue_str = self.venue1 + ((" " + self.venue2) if self.venue2 else "")
        return f"{self.date}: {venue_str} ({self.city}, {self.state_or_country})"

In [185]:
def shows_from_lines(data: list[list[str]]):
    shows = []
    current_show = None

    for row in data:
        if len(row) == 0:
            continue

        if len(row) < 3 and current_show is not None:
            [possible_set_label, song] = row
            if possible_set_label != '' or len(current_show.sets) == 0:
                s = Set()
                s.label = possible_set_label if possible_set_label else 'I'
                if song.startswith('(electric)') or song.startswith('(acoustic)'):
                    paren_idx = song.index(')') 
                    s.annotation = song[1:paren_idx]
                    song = song[paren_idx + 2:]
                s.songs = [song]

                current_show.sets.append(s)
            else:
                current_show.sets[len(current_show.sets) - 1].songs.append(song)

            current_show.lines.append("\t".join(row))
            continue
        elif len(row) < 3:
            continue

        # new show
        if current_show is not None:
            if len(current_show) > 0:
                shows.append(current_show)
            current_show = None

        [date_or_set, _band, venue1, venue2, city, state_or_country, notes] = row

        current_show = Show()
        current_show.date = date_or_set
        current_show.venue1 = venue1
        current_show.venue2 = venue2
        current_show.city = city
        current_show.state_or_country = state_or_country
        current_show.notes = notes
        current_show.sets = []

        current_show.lines = ["\t".join(row)]

    if current_show is not None:
        shows.append(current_show)
        current_show = None

    return shows


In [186]:
def trim_setlists():
    shows = shows_from_lines(original_lines)

    with open("data/setlist-trimmed.tsv", "w") as f:
        wr = csv.writer(f, delimiter="\t", quotechar='"')
        for show in shows:
            split_lines = [l.split("\t") for l in show.lines]
            wr.writerows(split_lines)

In [187]:
shows = shows_from_lines(original_lines)
for s in shows:
    if s.date == "1966/01/07":
        for se in s.sets:
            print(se)
            print("---")

I: 
Cold Rain & Snow
Beat It On Down The Line
Good Lovin'
Mama Tried
> Black Peter
Hard To Handle
St. Stephen
> Not Fade Away
Casey Jones
---
I: (electric)
China Cat Sunflower
> I Know You Rider
Me & My Uncle
Dire Wolf
Smokestack Lightnin'*
---
II: (acoustic)
Monkey & The Engineer
Little Sadie
Wake Up Little Susie*
> Black Peter*
Uncle John's Band
Katie Mae*
---
III: (electric)
Dark Star
> Cryptical Envelopment
> Drums
> The Other One
> Cryptical Envelopment
> Turn On Your Lovelight
---
E: 
And We Bid You Goodnight
---
