In [1]:
import os
import json
import re

In [57]:
def extract_release_date_and_name(album_folder_name):
    # This function extracts the release year and removes it from the album name
    match = re.search(r'(.*)\((\d{4})\)', album_folder_name)
    if match:
        # Return the album name without the year and the year itself
        return match.group(1).strip(), match.group(2)
    else:
        # If no year is found, return the whole name and 'Unknown' for the year
        return album_folder_name, 'Unknown'
    
def artist_cleanup(artist):
    return artist.replace('_', ' ').replace('.', '').replace('-', ' ').lower()

def update_or_add_album(artist_name, album_name, new_data, artist_albums_data):
    existing_album = next((album for album in artist_albums_data[artist_name] if album['album_name'] == album_name), None)

    if existing_album:
        existing_album.update(new_data)
    else:
        artist_albums_data[artist_name].append(new_data)


In [52]:
def create_artist_album_json(base_directory):
    regions = ['east_coast', 'west_coast']
    artist_albums = {}

    for region in regions:
        region_path = os.path.join(base_directory, region)
        if os.path.isdir(region_path):  # Confirm it's a directory
            for artist_name in os.listdir(region_path):
                artist_path = os.path.join(region_path, artist_name)
                if os.path.isdir(artist_path):  # Confirm it's a directory
                    artist_name = artist_cleanup(artist_name)
                    full_artist_name = f"{artist_name}"
                    artist_albums[full_artist_name] = []

                    for album_folder_name in os.listdir(artist_path):
                        album_path = os.path.join(artist_path, album_folder_name)
                        if os.path.isdir(album_path):  # Confirm it's a directory
                            album_name, release_date = extract_release_date_and_name(album_folder_name)
                            artist_albums[full_artist_name].append({
                                'album_name': album_name,
                                'release_date': release_date,
                                'path': album_path,
                                'region': region
                            })

    return artist_albums

# Provide the path to the directory containing 'east_coast' and 'west_coast' folders
coast_directories = '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Lyrics'
albums_json = create_artist_album_json(coast_directories)

# Convert to JSON and print or save as required
json_output = json.dumps(albums_json, indent=4)
print(json_output)

# Write to a file, if needed
json_file_path = os.path.join(coast_directories, 'artist_albums.json')
with open(json_file_path, 'w') as json_file:
    json_file.write(json_output)

{
    "big l": [
        {
            "album_name": "Lifestylez Ov Da Poor and Dangerous",
            "release_date": "1995",
            "path": "/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Lyrics/east_coast/big_l/Lifestylez Ov Da Poor and Dangerous (1995)",
            "region": "east_coast"
        }
    ],
    "beastie boys": [
        {
            "album_name": "Check Your Head",
            "release_date": "1992",
            "path": "/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Lyrics/east_coast/beastie_boys/Check Your Head (1992)",
            "region": "east_coast"
        },
        {
            "album_name": "Hello Nasty",
            "release_date": "1998",
            "path": "/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Lyrics/east_coast/beastie_boys/Hello Nasty (1998)",
            "region": "east_coast"
        },
        {
            "album_name": "Ill Communication",
            "release_date": "1994",
            "path": "/Users/borosabel/Documen

In [126]:
def merge_with_audio(base_directory):
    regions = ['east_coast', 'west_coast']
    json_file_path = '/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Lyrics/artist_albums.json'
    
    with open(json_file_path, 'r') as json_file:
        artist_albums_data = json.load(json_file)
    for region in regions:
        region_path = os.path.join(base_directory, region)
        if os.path.isdir(region_path):
            for artist_name in os.listdir(region_path):
                artist_path = os.path.join(region_path, artist_name)
                if os.path.isdir(artist_path): 
                    artist_name = artist_cleanup(artist_name)
                    full_artist_name = f"{artist_name}"
                    try:
                        for albums in artist_albums_data[artist_name]:
                            json_album = album_name_cleanup(albums['album_name']) 
                            for album_folder_name in os.listdir(artist_path):
                                album_name = album_name_cleanup(album_folder_name)
                                if(json_album == album_name):
                                    albums['audio_path'] = os.path.join(artist_path, album_folder_name)
                    except KeyError:
                        print("Nem jó", full_artist_name)
                        
    return artist_albums_data

In [127]:
a = merge_with_audio('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio')

# Convert to JSON and print or save as required
json_output = json.dumps(a, indent=4)

# Write to a file, if needed
json_file_path = os.path.join('/Users/borosabel/Documents/Uni/Thesis/PopMIR/Data/Audio', 'test.json')
with open(json_file_path, 'w') as json_file:
    json_file.write(json_output)

Nem jó slick rick


In [115]:
def album_name_cleanup(album_name):
    return album_name.replace('&', 'and').replace('…', '...').replace(',', '').replace('Is', 'is').replace('Vol. 1', 'Vol.1').replace(' A ', ' a ').replace(' To ', ' to ').replace(' And ', ' and ').replace(' em', " 'Em").replace('Life is Too Short', 'Life Is... Too Short').replace('Rhyme Pays','Rhymes Pays').replace( 'Strictly 4 My N.I.G.G.A.Z.', 'Strictyl 4 My Niggaz').replace('Vol. 2', 'Vol.2').replace(' With ', ' with ').replace('!', '').replace('Mr Smith', 'Mr.Smith')