In [3]:
import pandas as pd
# !pip install spotipy
import spotipy
# !pip install dask
import dask.dataframe as dd
from spotipy.oauth2 import SpotifyClientCredentials
# from google.colab import userdata
# from google.colab import drive
import time

# drive.mount('/content/drive')

In [4]:
def load_data(file_path):
    return dd.read_csv(file_path)


In [5]:
def authenticate_spotify():
    client_id = userdata.get('spotify_client_id')
    client_secret = userdata.get('spotify_client_secret')

    if client_id and client_secret:
        credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
        sp = spotipy.Spotify(client_credentials_manager=credentials_manager)
        print("Spotify authentication successful.")
        return sp
    else:
        print("Failed to authenticate with Spotify.")
        return None

In [6]:
def get_artist_genre(sp, artist_name):
    try:
        if sp:
            results = sp.search(q=artist_name, type='artist', limit=1)
            if results['artists']['items']:
                artist = results['artists']['items'][0]
                genres = artist['genres']
                return genres[0] if genres else 'Unknown'
    except Exception as e:
        print(f"Failed to get genre for artist {artist_name}: {e}")
    return 'Unknown'

In [7]:
def get_artist_info(sp, artist_name):
    try:
        if sp:
            results = sp.search(q=artist_name, type='artist', limit=1)
            if results['artists']['items']:
                artist = results['artists']['items'][0]
                artist_id = artist['id']
                artist_info = sp.artist(artist_id)
                popularity = artist_info['popularity']
                followers = artist_info['followers']['total']
                return popularity, followers
    except Exception as e:
        print(f"Failed to get info for artist {artist_name}: {e}")
    return None, None

In [8]:
def get_track_info(sp, track_name, artist_name):
    try:
        if sp:
            results = sp.search(q=f'track:{track_name} artist:{artist_name}', type='track', limit=1)
            if results['tracks']['items']:
                track = results['tracks']['items'][0]
                track_id = track['id']
                track_info = sp.track(track_id)
                popularity = track_info['popularity']
                audio_features = sp.audio_features([track_id])[0]
                return popularity, audio_features
    except Exception as e:
        print(f"Failed to get info for track {track_name} by artist {artist_name}: {e}")
    return None, None

In [9]:
def apply_functions(df, sp):
    df['genre'] = df['artname'].map_partitions(lambda pdf: pdf.apply(lambda row: get_artist_genre(sp, row['artname'])))
    artist_cols = ['artist_popularity', 'artist_followers']
    temp_df = df[['artname'] + artist_cols].copy()
    df[artist_cols] = df[['artname'] + artist_cols].apply(lambda row: get_artist_info(sp, row['artname']), axis=1).apply(pd.Series)
    track_cols = ['track_popularity', 'audio_features']
    df = df.dropna(subset=artist_cols, how='all').reset_index(drop=True)
    combined_cols = artist_cols + track_cols
    df = pd.concat([df.drop(columns=combined_cols), df[combined_cols]], axis=1)
    df[track_cols] = df.apply(lambda row: get_track_info(sp, row['track_name'], row['artname']), axis=1).apply(pd.Series)
    return df

In [10]:
def save_data(df, path):
    df.compute().to_csv(path, index=False)

In [11]:
if __name__ == '__main__':
    file_path = '/content/drive/My Drive/Colab Notebooks/dataset.csv'
    output_path = '/content/drive/My Drive/Colab Notebooks/dataset_modified.csv'
    sp = authenticate_spotify()
    ddf = load_data(file_path)
    df = apply_functions(ddf, sp)
    save_data(df, output_path)


Spotify authentication successful.


ValueError: Metadata inference failed in `lambda`.

You have supplied a custom function and Dask is unable to 
determine the type of output that that function returns. 

To resolve this please provide a meta= keyword.
The docstring of the Dask function you ran should have more information.

Original error is below:
------------------------
TypeError('string indices must be integers')

Traceback:
---------
  File "/usr/local/lib/python3.10/dist-packages/dask/dataframe/utils.py", line 193, in raise_on_meta_error
    yield
  File "/usr/local/lib/python3.10/dist-packages/dask/dataframe/core.py", line 6893, in _emulate
    return func(*_extract_meta(args, True), **_extract_meta(kwargs, True))
  File "<ipython-input-9-f9db68f5f088>", line 2, in <lambda>
    df['genre'] = df['artname'].map_partitions(lambda pdf: pdf.apply(lambda row: get_artist_genre(sp, row['artname'])))
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/series.py", line 4630, in apply
    return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py", line 1025, in apply
    return self.apply_standard()
  File "/usr/local/lib/python3.10/dist-packages/pandas/core/apply.py", line 1076, in apply_standard
    mapped = lib.map_infer(
  File "pandas/_libs/lib.pyx", line 2834, in pandas._libs.lib.map_infer
  File "<ipython-input-9-f9db68f5f088>", line 2, in <lambda>
    df['genre'] = df['artname'].map_partitions(lambda pdf: pdf.apply(lambda row: get_artist_genre(sp, row['artname'])))
