In [None]:
# %pip install python3-discogs-client
# %pip install xlrd openpyxl

import discogs_client
import pandas as pd
import time
import os

# Configurações
DISCOGS_TOKEN = ""
CHECKPOINT_FILE = 'koah_capas_jazz_last_processed_line.txt'
OUTPUT_FILE = 'koah_capas_jazz.xlsx'

# Inicializa o cliente do Discogs
dc = discogs_client.Client('MeuApp/1.0', user_token=DISCOGS_TOKEN)

# Lê o arquivo Excel
df = pd.read_excel('koah_capas_jazz.xls')

# Função para buscar a capa de um álbum
def get_album_cover(album_name, artist_name):
    try:
        # Check if artist_name is empty or NaN
        if pd.isna(artist_name) or artist_name == "":
            search_query = f"{album_name}"
        else:
            search_query = f"{album_name} {artist_name}"
            
        results = dc.search(search_query, type='release')
        if results:
            first_result = results[0]
            cover_url = first_result.images[0]['uri'] if first_result.images else None
            return cover_url
        else:
            return "Capa não encontrada"
    except Exception as e:
        print(f"Erro ao buscar {search_query}: {e}")
        return "Erro na busca"
    finally:
        time.sleep(1.1)  # Respeita o limite de 60 requisições por minuto


# Get the last processed line
start_line = 0
if os.path.exists(CHECKPOINT_FILE):
    with open(CHECKPOINT_FILE, 'r') as f:
        start_line = int(f.read().strip())
    print(f"Resuming from line {start_line + 1}")

# Get column names
album_col = df.columns[2]
artist_col = df.columns[3]
print(f"Using columns: Album = '{album_col}', Artist = '{artist_col}'")

# Busca as capas para todos os álbuns
for index, row in df.iloc[start_line:].iterrows():
    print(f"Processing line {index + 1}...")
    df.at[index, 'Cover_URL'] = get_album_cover(row[album_col], row[artist_col])

    # When saving checkpoint, update this line:
    if (index + 1) % 10 == 0:
        print(f"Saving checkpoint at line {index + 1}")
        # Write the current index to the checkpoint file
        with open(CHECKPOINT_FILE, 'w') as f:
            f.write(str(index))
        # Save with xlsx format
        df.to_excel(OUTPUT_FILE, index=False)

# Clear checkpoint file after successful completion
if os.path.exists(CHECKPOINT_FILE):
    os.remove(CHECKPOINT_FILE)

# Exibe os resultados
for index, row in df.iterrows():
    print(f"Line {index + 1} - {row[album_col]} - {row[artist_col]}: {row['Cover_URL']}")
