In [None]:
import csv
import re
import unicodedata
import json

input_file = "../official_list.json"
output_file = "official_list.csv"
# id,name,artist,game_rule,full_title,artist_name_string,artist0,artist0_id,artist1,artist1_id,artist2,artist2_id,artist3,artist3_id,artist4,artist4_id
with open(input_file, mode="r", encoding="utf-8") as infile:
    data = json.load(infile)
    
with open(output_file, mode="w", encoding="utf-8", newline="") as outfile:
    writer = csv.writer(outfile)
    
    # Write the header
    writer.writerow(["id", "name", "game_rule", "full_title","artist_name_string","artist0","artist0_id",
                     "artist1","artist1_id","artist2","artist2_id","artist3","artist3_id","artist4","artist4_id"])

    for song in data.values():
        try:
            new_row = [
                song.get("spotify_track_id", song["title"]),
                song["title"],
                "",
                song["full_title"],
                song["artist_name"],
            ]
            for i, artist in enumerate(song["artists"]):
                new_row.append(artist["title"])
                new_row.append(artist.get("spotify_artist_id", artist["title"]))
            writer.writerow(new_row)
        except Exception as e:

            print(e, song)
    
        


In [21]:
import csv
import re
import unicodedata
import json
# Input and output file names
input_file = "official_list.csv"
output_file = "artists.csv"

def clean_string(input_string):
    # Normalize accented characters to their non-accented counterparts
    normalized = unicodedata.normalize('NFD', input_string)
    no_accents = ''.join(char for char in normalized if unicodedata.category(char) != 'Mn')
    
    # Remove non-alphanumeric characters
    cleaned = re.sub(r'[^a-zA-Z0-9]', '', no_accents)
    if len(cleaned) < 2:
        return input_string.lower()
    return cleaned.lower()

def convert_to_json(input_file, output_file):
    # Read the CSV file and convert to JSON
    with open(input_file, mode="r", encoding="utf-8") as infile:
        # data = json.load(infile)
        reader = csv.DictReader(infile)

        # Convert rows to a list of dictionaries
        data = [row for row in reader]
        

    # Write the JSON data to the output file
    with open(output_file, mode="w", encoding="utf-8") as outfile:
        json.dump(data, outfile, indent=2)

    print(f"CSV data has been converted to JSON and written to {output_file}.")


# Open the input CSV file
with open(input_file, mode="r", encoding="utf-8") as infile:
    reader = csv.DictReader(infile)

    # Create a set to store unique artist IDs and names
    unique_artists = set()

    # Iterate through the rows of the input file
    for row in reader:
        # Extract artist columns dynamically
        for i in range(5):  # Assuming artist0 to artist4 and corresponding IDs
            artist_col = f"artist{i}"
            artist_id_col = f"artist{i}_id"

            if artist_col in row and artist_id_col in row:
                if row[artist_col] is None:
                    continue
                artist_name = row[artist_col].strip()
                artist_id = row[artist_id_col].strip()

                if artist_name and artist_id:
                    # Add a tuple of (artist_id, artist_name) to the set
                    unique_artists.add((artist_id, artist_name))

# Write the unique artists to the output CSV file
with open(output_file, mode="w", encoding="utf-8", newline="") as outfile:
    writer = csv.writer(outfile)
    
    # Write the header
    writer.writerow(["id", "name", "searchable_name"])

    # Write unique artist rows
    for artist_id, artist_name in sorted(unique_artists):
        searchable = clean_string(artist_name)
        writer.writerow([artist_id, artist_name, searchable])
convert_to_json(output_file, "artist.json")
print(f"Unique artists have been written to {output_file}.")


CSV data has been converted to JSON and written to artist.json.
Unique artists have been written to artists.csv.


In [4]:
import csv
import unicodedata
import re
# Input and output file names
input_file = "../official_list.csv"
output_file = "songs.csv"

# Open the input CSV file
with open(input_file, mode="r", encoding="utf-8") as infile:
    reader = csv.DictReader(infile)
    
    # Prepare the output rows
    output_rows = []

    # Iterate through the rows of the input file
    for row in reader:
        # Collect all artist IDs into a list
        artist_ids = []
        for i in range(5):  # Assuming artist0_id to artist4_id
            artist_id_col = f"artist{i}_id"
            if artist_id_col in row and row[artist_id_col].strip():
                artist_ids.append(row[artist_id_col].strip())

        # Remove the artist columns and add the condensed IDs
        condensed_row = {key: value for key, value in row.items() if not key.startswith("artist") or key == "artist"}
        condensed_row["artist_ids"] = "|".join(artist_ids) + "|"
        condensed_row["searchable_name"] = clean_string(row["name"])
        condensed_row["searchable_artist"] = clean_string(row["artist"])
        # Append the modified row to the output
        output_rows.append(condensed_row)

# Write the modified rows to the output CSV file
with open(output_file, mode="w", encoding="utf-8", newline="") as outfile:
    # Use the fieldnames from the modified row keys
    fieldnames = output_rows[0].keys() if output_rows else []
    writer = csv.DictWriter(outfile, fieldnames=fieldnames)

    # Write the header
    writer.writeheader()

    # Write the rows
    writer.writerows(output_rows)

print(f"Condensed artist IDs have been written to {output_file}.")
convert_to_json(output_file, "songs.json")


Condensed artist IDs have been written to songs.csv.
CSV data has been converted to JSON and written to songs.json.
