In [None]:
import os
import json

def find_json_files(directory):
    json_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.json'):
                json_files.append(os.path.join(root, file))
    return json_files

def extract_track_ids(json_files):
    track_ids = []
    for json_file in json_files:
        with open(json_file, 'r') as file:
            data = json.load(file)
            albums = data.get('albums', [])
            for album in albums:
                tracks = album.get('tracks', {}).get('items', [])
                for track in tracks:
                    track_ids.append(track.get('id'))
    return track_ids

# Specify the folder path here
folder_path = 'C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\SCRAPED_DATA\\ALBUMS'

# Find all JSON files within the folder and its subfolders
json_files = find_json_files(folder_path)

# Extract all track IDs from these JSON files
track_ids = extract_track_ids(json_files)

unique_track_ids = list(set(track_ids))

# Save unique track IDs to a file
with open('unique_track_ids.txt', 'w') as f:
    for track_id in unique_track_ids:
        f.write(f"{track_id}\n")

print("Unique track IDs have been saved to unique_track_ids.txt.")


In [None]:
import requests
import json
import os
import time

def load_track_ids(file_path):
    try:
        with open(file_path, 'r') as file:
            return file.read().splitlines()  # Assuming each track ID is on a new line
    except FileNotFoundError:
        print("File not found.")
        return []

def get_tracks_info(track_ids, access_token, market='US'):
    url = 'https://api.spotify.com/v1/tracks'
    headers = {'Authorization': f'Bearer {access_token}'}
    params = {'ids': ','.join(track_ids), 'market': market}
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Failed to retrieve tracks information. Status code: {response.status_code}, Response: {response.text}")
        return None

# Replace with your actual Spotify access token
access_token = 'BQAuDuCoQIDmqoQshBaDH7Q3XrVRMRPndhKrkrToNFoQOamhT0I7OsUkkQFtKjz7xvSjeXIFggllEvbXA-98kMgcNR8g_eI_jj2UNsGLlWSnBQSNenWbuVRCDmfeqG_9_55CFsrStBfSYpU5QqbmzjGo_9EzbDtbafjd6XEFxaPUxPjInWFLx5LkTeRa9yjVaiW33BOvtTlwFe5v-6y82wmcsPuumEK749ytm6EYhznuvQGS8Eklr6uFPjVOPky-UmYSDCc5_Ei-OV9ZxoUaY_aodN6yo4M2YEb2pC10'
# The path to your file containing track IDs
track_ids_file_path = 'TRACK_IDs_TO_SCRAPE.txt'
# The directory where the fetched track information will be saved
output_directory = 'track_information'
os.makedirs(output_directory, exist_ok=True)

# Load the track IDs
track_ids = load_track_ids(track_ids_file_path)

# Process the list in chunks of 50 IDs at a time (Spotify allows up to 100, but 50 is chosen here for demonstration)
for i in range(0, len(track_ids), 50):
    chunk = track_ids[i:i+50]
    tracks_info = get_tracks_info(chunk, access_token)
    if tracks_info:
        # Consider timestamp or some unique attribute for filename uniqueness if required
        timestamp = int(time.time())
        with open(f'{output_directory}/tracks_info_{i}-{i+len(chunk)}_{timestamp}.json', 'w') as output_file:
            json.dump(tracks_info, output_file)

print("Tracks information has been saved.")


In [None]:
import pandas as pd

# Load the CSV files
contributors_df = pd.read_csv('C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\SCRAPED_DATA\\WORKING\\contributors_normalized_onlyMatchedArtists.csv')
artists_df = pd.read_csv('C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\SCRAPED_DATA\\FINISHED\\NEWEST\\\PRODFILES\\artists_table.csv', sep=';')
artist_album_mapping_df = pd.read_csv('C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\SCRAPED_DATA\\FINISHED\\NEWEST\\PRODFILES\\artist_album_mapping.csv', sep=';')
track_data_df = pd.read_csv('C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\SCRAPED_DATA\\FINISHED\\NEWEST\\tracks_data_full.csv', sep=';')

# Step 1: Merge artists with their albums
artists_with_albums = pd.merge(artists_df, artist_album_mapping_df, left_on='id', right_on='artistID', how='inner')

In [None]:
# Step 2: Merge the above with track data on albumID
artist_tracks = pd.merge(artists_with_albums, track_data_df, left_on='albumID', right_on='album_id', how='inner')

In [None]:
contributors_dict = contributors_df.groupby('artist_mbid')['recording_mbid'].apply(set).to_dict()
