In [None]:
import requests
import json

# Function to split the list of IDs into chunks
def split_list(lst, n=100):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

# Function to make the Spotify API call
def fetch_artists_data(ids, token):
    """Fetch artists data from Spotify and return the response JSON."""
    headers = {
        'Authorization': f'Bearer {token}',
    }
    # Join the IDs with a comma for the API request
    ids_param = ','.join(ids)
    print(ids_param)
    response = requests.get(f'https://api.spotify.com/v1/artists?ids={ids_param}', headers=headers)
    return response.json()


# Load the Spotify artist IDs from a file
with open('C:\\Users\\Music\\Desktop\\PROJECTS\\Spotify Project\\96kArtistIDList.txt', 'r') as file:
    ids = file.read().splitlines()

token = 'BQAui-mmnKVGYXNb32vg-dlx9eH2Y9MzwbdgEEPN1X6C1L2hMRL1YFwgVh4yc1kx3HnQa16KV2l9GY7ESShFI3VxnsBbuAUZurvFIAYI5e5ImP7m3lcOWwmD2xIYHxEn63NIh0ykRn7YFkv0AoS6sFcv6wOoDD-ukkCJEOG--Bvq4cMAc1_tWCPbnOnCpwjyr994wG6W0LHuVGMA78buffkSEkia8f2Zzs7InXWVvPhWse7WQKzBkXYUoCZkL5FHoansJXsL-blUqtdZFgy5_nvJq-1TaNawX3oisBjD'

# Split the IDs into chunks of 100
id_chunks = list(split_list(ids, 50))

for i, chunk in enumerate(id_chunks):
    response_json = fetch_artists_data(chunk, token)
    
    # Check if the response contains an error
    if 'error' in response_json:
        print(f"Error fetching data for chunk {i+1}: {response_json['error']['message']}")
    else:
        # Save each chunk response into a separate JSON file
        with open(f'artists_data_chunk_{i+1}.json', 'w') as outfile:
            json.dump(response_json, outfile)

        print(f'Successfully fetched and saved data for chunk {i+1}')


In [None]:
import json
import csv
import os
from datetime import datetime

# The directory containing your JSON files
json_files_directory = 'C:\\Users\\Music\\team_project\\team37\\prepopulationStuff\\PythonNotebooksForPrepopulation\\artist_objects_full_scrape_1'

# The path for the output CSV file
csv_file_path = 'artists_data.csv'

# Current date for 'Date Added To DB' and 'Date Last Modified' columns in the YYYY-MM-DD format
current_date = datetime.now().strftime('%Y-%m-%d')

# Template for CSV rows with the updated header titles including new fields for image sizes
csv_columns = ['id', 'artist_spotify_id', 'artist_name', 'artist_popularity', 'artist_image_small', 'artist_image_medium', 'artist_image_large', 'artist_followers', 'date_added_to_db', 'date_last_modified']

# Initialize an empty dictionary to keep track of unique artists by their Spotify ID
unique_artists = {}

# Function to read each JSON file and extract artist information, including different image sizes
def process_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
        for artist in data.get('artists', []):
            # Check if the artist ID already exists to avoid duplicates
            if artist['id'] not in unique_artists:
                # Extract images in different sizes
                images = artist.get('images', [])
                image_small = ''
                image_medium = ''
                image_large = ''
                if images:
                    images.sort(key=lambda x: x['height'], reverse=True)  # Sort images by size (largest first)
                    image_large = images[0]['url'] if len(images) > 0 else ''
                    image_medium = images[1]['url'] if len(images) > 1 else ''
                    image_small = images[2]['url'] if len(images) > 2 else ''
                
                # If unique, add the artist to the dictionary including the new fields
                unique_artists[artist['id']] = {
                    'artist_spotify_id': artist['id'],
                    'artist_name': artist.get('name', ''),
                    'artist_popularity': artist.get('popularity', ''),
                    'artist_image_small': image_small,
                    'artist_image_medium': image_medium,
                    'artist_image_large': image_large,
                    'artist_followers': artist['followers']['total'] if artist.get('followers') else '',
                    'date_added_to_db': current_date,
                    'date_last_modified': current_date
                }

# Iterate over each JSON file in the directory and process it
for filename in os.listdir(json_files_directory):
    if filename.endswith('.json'):
        process_json_file(os.path.join(json_files_directory, filename))

# Write the unique artists to a CSV file with semicolons as delimiters
with open(csv_file_path, mode='w', newline='', encoding='utf-8') as csv_file:
    writer = csv.DictWriter(csv_file, fieldnames=csv_columns, delimiter=';')
    writer.writeheader()
    
    for i, artist_id in enumerate(unique_artists, start=1):
        # Add the row ID
        row = {'id': i}
        row.update(unique_artists[artist_id])
        writer.writerow(row)

print(f"CSV file has been successfully created at {csv_file_path} with {len(unique_artists)} unique artist entries.")
