In [None]:
"""
Note: Gemini was used as autocomplete for this notebook
"""

"""
This notebook will help you download the album cover images, and assign genre labels to them
"""

# Download datasets
# !gdown https://drive.google.com/uc?id=1myz_mN01Uv8WGUk_Wtag5fiZw8v-WVos
# !gdown https://drive.google.com/uc?id=1qsStd4onpGFLLMPEdjB4DDb8negqNLdz

In [None]:
import numpy as np
import pandas as pd
from time import sleep
# from google.colab import drive
# drive.mount('/content/drive')

albums = pd.read_csv('albums.csv', sep="\t")
albums.head()

In [None]:
artists = pd.read_csv('artists.csv', sep="\t")
artists.head()

In [None]:
# For each album, do a few things:
# 1. Download the associated image from the albums.csv file
# 2. For each artist associated with the album, match their ID with the
# appropriate ID in artists.csv, and label them as such

In [None]:
import requests
from io import BytesIO
from PIL import Image

def save_image_from_url(image_url, file_path):
  """
  Saves an image from a URL to a local file.

  Args:
    image_url: The URL of the image to save.
    file_path: The local file path to save the image to.
  """
  response = requests.get(image_url, stream=True)
  response.raise_for_status()  # Raise an exception for bad responses (4xx or 5xx)
  image = Image.open(BytesIO(response.content))
  image.save(file_path)

In [None]:
!mkdir album_covers

In [None]:
import time
import ast
import shutil
import os
import traceback  # Import for traceback information

output_df = pd.DataFrame(columns=['album_index', 'album_id', 'genres'])
for index, row in albums.iterrows():
    try:
        artist = ast.literal_eval(row.artists)
        genres = []
        for id in artist:
            matching_row = artists[artists['artist_id'] == id]
            print(matching_row)
            genres.append(ast.literal_eval(matching_row['genres'].tolist()[0]))
        if len(genres) == 0:
            continue
        # Download the image of the album associated with these genres
        save_image_from_url(row.image_url, "album_covers/" + str(index) + ".jpg")

        new_row = {'album_index': len(output_df), 'album_id': row.album_id, 'genres': genres}
        output_df.loc[len(output_df)] = new_row
        time.sleep(0.25)  # Optional: Add a delay to avoid overwhelming the server
        # if index > 10:
        #     break
    except Exception as e:
        print(f"Error processing album at index {index}: {e}")
        traceback.print_exc()  # Print traceback for debugging
        # You can choose to continue or break based on the specific error

# Save results regardless of errors
output_df.to_csv('album_genres.csv', index=True)
shutil.copy('album_genres.csv', '/content/drive/MyDrive/Colab Notebooks/qac239/album_genres.csv')

# # Check if the destination directory exists before attempting to copy
# if not os.path.exists('/content/drive/MyDrive/Colab Notebooks/qac239/album_covers'):
#     shutil.copytree('album_covers', '/content/drive/MyDrive/Colab Notebooks/qac239/album_covers')
# else:
#     print("Destination directory already exists. Overwriting...")
#     shutil.rmtree('/content/drive/MyDrive/Colab Notebooks/qac239/album_covers')  # Remove existing directory
#     shutil.copytree('album_covers', '/content/drive/MyDrive/Colab Notebooks/qac239/album_covers')  # Copy new directory