In [8]:
import json
from dataclasses import dataclass, field

In [9]:
@dataclass(frozen=True, unsafe_hash=True)
class Album:
    name: str
    release_date: str
    group: str
    type: str
    id: str
    tracks: list["Track"] = field(default_factory=list, init=False, hash=False, repr=False)

    @staticmethod
    def fromdict(d):
        album = Album(
            name=d["name"],
            release_date=d["release_date"],
            group=d["album_group"],
            type=d["album_type"],
            id=d["id"],
        )
        album.tracks.extend(Track.fromdict(t, album) for t in d["tracks"])
        return album


@dataclass(frozen=True, unsafe_hash=True)
class Track:
    name: str
    album: Album = field(hash=False)
    isrc: str
    available_markets: list[str] = field(hash=False, repr=False)
    id: str
    uri: str
    stolen: set["Track"] = field(default_factory=set, init=False, hash=False)
    tvs: set["Track"] = field(default_factory=set, init=False, hash=False)
    sorted_tvs: list["Track"] = field(default_factory=list, init=False, hash=False)

    @staticmethod
    def fromdict(d, album):
        return Track(
            name=d["name"],
            album=album,
            isrc=d["external_ids"]["isrc"],
            available_markets=d["available_markets"],
            id=d["id"],
            uri=d["uri"],
        )

In [10]:
with open("run/albums.json", "r") as f:
    albums = json.load(f)
albums: list[Album] = [Album.fromdict(d) for d in albums]
albums.sort(key=lambda a: a.release_date, reverse=True)

In [11]:
with open("run/album_names.txt", "w") as f:
    for album in albums:
        f.write(f"{album.name} ({album.release_date})\n")

In [18]:
def normalize_name(name: str):
    return name \
        .replace("\u2019", "'") \
        .replace("SuperStar", "Superstar") \
        .replace("I Knew You Were Trouble.", "I Knew You Were Trouble")


def remove_parens(name: str):
    return name.split("(", 1)[0].strip()


tracks = [track for album in albums for track in album.tracks]

for track in tracks:
    track.tvs.clear()


for track in tracks:
    name = normalize_name(track.name)
    if (
        "(Taylor's Version)" in name and "(From The Vault)" not in name
    ) or (
        "All Too Well (10 Minute Version)" in name
    ) or (
        "All Too Well (Sad Girl Autumn Version)" in name
    ):
        song_name = remove_parens(name)

        for stolen in tracks:
            stolen_name = normalize_name(stolen.name)
            if stolen_name.startswith(song_name) and " Version)" not in stolen_name:  # Version) also catches ATW versions
                track.stolen.add(stolen)
                stolen.tvs.add(track)
        if not track.stolen:
            print(f"No stolen found: {track.name} (normalized: {song_name})")


for stolen in tracks:
    song_name = normalize_name(stolen.name)
    # filter by Acoustic/non-Acoustic
    filtered_tvs = [
        r for r in stolen.tvs
        if ("Acoustic" in stolen.name) == ("Acoustic" in r.name)
    ]
    if stolen.tvs and not filtered_tvs:
        print("Not using Acoustic filter, no TVs left:", stolen)
        filtered_tvs = stolen.tvs
    stolen.sorted_tvs.clear()
    stolen.sorted_tvs.extend(sorted(
        filtered_tvs,
        key=lambda tv: (
            # special ATW handling
            # SGAV last
            "(Sad Girl Autumn Version)" not in tv.name,
            # TMVTSF second to last
            "(10 Minute Version) (The Short Film)" not in tv.name,


            # exact title matches first
            song_name == remove_parens(normalize_name(tv.name)),
            # prefer non-remix (e.g. "Love Story (Taylor's Version)" over "Love Story (Taylor's Version) [Elvira Remix]")
            "Remix" not in tv.name,
            # prefer Acoustic match
            ("Acoustic" in stolen.name) == ("Acoustic" in tv.name),
            # Piano Version match (e.g. for "Forever & Always - Piano Version", prefer "Forever & Always (Piano Version) (Taylor’s Version)")
            ("Piano" in stolen.name) == ("Piano" in tv.name),
            # albums over singles
            tv.album.type == "album",
            # singles over compilations
            tv.album.type == "single",
            # pre-release singles over "…(Taylor's Version) Chapter"
            "Chapter" not in tv.album.name,
            # for singles, prefer single release (e.g. album "Wildest Dreams TV" and not "This Love TV")
            tv.name == tv.album.name,
        ),
        reverse=True
    ))


tvs = {}
EXTRA_INFO = False
for stolen in tracks:
    if stolen.sorted_tvs:
        if stolen.isrc in tvs:
            continue
        tvs[stolen.isrc] = {}
        if EXTRA_INFO:
            tvs[stolen.isrc] = {
                "__name ": stolen.name,
                "__id": stolen.id,
                "__album": stolen.album.name,
                "__album_type": stolen.album.type,
                #"__avail_markets": stolen.available_markets
            }
        if "- live" in stolen.name.lower():
            tvs[stolen.isrc]["isLive"] = True
        if "remix" in stolen.name.lower():
            tvs[stolen.isrc]["isRemix"] = True
        if "acoustic" in stolen.name.lower() and "acoustic" not in stolen.sorted_tvs[0].name.lower():
            tvs[stolen.isrc]["isAcousticWithoutTV"] = True
        if "demo" in stolen.name.lower():
            tvs[stolen.isrc]["isDemo"] = True
        if " mix" in stolen.name.lower():
            tvs[stolen.isrc]["isMix"] = True
        tvs[stolen.isrc]["ids"] = [
            track.id
            if not EXTRA_INFO else
            {
                "__name ": track.name,
                "__isrc": track.isrc,
                "__album": track.album.name,
                "__album_type": track.album.type,
                #"__avail_markets": track.available_markets,
                "id": track.id,
            }
            for track in stolen.sorted_tvs
        ]
with open("../src/app/taylorsversions.json", "w") as f:
    json.dump(tvs, f, indent=2)


with open("run/find_log.txt", "w") as f:
    for album in albums:
        print("###", album.name, end="\n\n", file=f)
        for track in album.tracks:
            print(track.name, file=f)
            if track.isrc in tvs:
                for tag in ("isLive", "isRemix", "isAcousticWithoutTV", "isDemo", "isMix"):
                    if tag in tvs[track.isrc]:
                        print("  ", tag, file=f)
            for tv in track.sorted_tvs:
                print(f"  -> {tv.name} • {tv.album.name}", file=f)
            if (
                not track.sorted_tvs and
                "(Taylor's Version)" not in album.name and
                album.release_date < "2019-08-20"
            ):
                print("  NO TVs", file=f)
            print(file=f)
        print("\n\n", file=f)

No stolen found: Message In A Bottle (Fat Max G Remix) (Taylor’s Version) (normalized: Message In A Bottle)
No stolen found: Eyes Open (Taylor's Version) (normalized: Eyes Open)
No stolen found: Safe & Sound (feat. Joy Williams and John Paul White) (Taylor’s Version) (normalized: Safe & Sound)
No stolen found: Safe & Sound (feat. Joy Williams and John Paul White) (Taylor’s Version) (normalized: Safe & Sound)
No stolen found: Eyes Open (Taylor's Version) (normalized: Eyes Open)
Not using Acoustic filter, no TVs left: Track(name='Back To December - Acoustic', album=Album(name='Speak Now (Deluxe Edition)', release_date='2010-10-25', group='album', type='album', id='5EpMjweRD573ASl7uNiHym'), isrc='USCJY1003672', id='6IH2Z54gYPLqgJtxZjX80i', uri='spotify:track:6IH2Z54gYPLqgJtxZjX80i', stolen=set(), tvs={Track(name="Back To December (Taylor's Version)", album=Album(name="Speak Now (Taylor's Version)", release_date='2023-07-07', group='album', type='album', id='5AEDGbliTTfjOB8TSm1sxt'), isrc=