In [1]:
import os

import pandas as pd
import spotipy
print(spotipy.__version__)

from spotipy.oauth2 import SpotifyClientCredentials
from dotenv import load_dotenv

AttributeError: module 'spotipy' has no attribute '__version__'

In [2]:
# Load environment variables from .env file
load_dotenv()

False

In [3]:
def create_spotify_client(client_id: str, client_secret: str):
    """
    Create a Spotify client using client credentials.
    
    :param client_id: Spotify Client ID
    :param client_secret: Spotify Client Secret
    :return: An authenticated Spotify client
    """
    client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
    return spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [4]:
def extract_new_releases(client_id: str, client_secret: str) -> list[dict]:
    """
    Extract new music releases from Spotify.
    
    :param client_id: Spotify Client ID
    :param client_secret: Spotify Client Secret
    :return: A list of dictionaries containing new releases and their audio features
    """
    sp = create_spotify_client(client_id, client_secret)

    # Fetch the new releases from Spotify
    new_releases = sp.new_releases(limit=50)  # Adjust limit as needed
    data = []

    for album in new_releases['albums']['items']:
        album_info = {
            'album_name': album['name'],
            'album_id': album['id'],
            'release_date': album['release_date'],
            'total_tracks': album['total_tracks'],
            'artist_name': album['artists'][0]['name'],
            'artist_id': album['artists'][0]['id']
        }
        # Fetch audio features for each track in the album
        track_data = sp.album_tracks(album['id'])
        for track in track_data['items']:
            # Fetch audio features
            features = sp.audio_features(track['id'])[0]  # Extract audio features per track
            # Fetch track details to get popularity
            track_details = sp.track(track['id'])  # Fetch detailed track info
            track_info = album_info.copy()  # Copy album data for each track to avoid overwriting
            track_info.update({
                'track_name': track['name'],
                'track_id': track['id'],
                'acousticness': features['acousticness'],
                'danceability': features['danceability'],
                'energy': features['energy'],
                'instrumentalness': features['instrumentalness'],
                'liveness': features['liveness'],
                'loudness': features['loudness'],
                'speechiness': features['speechiness'],
                'tempo': features['tempo'],
                'valence': features['valence'],
                'popularity': track_details['popularity']  # Fetch popularity
            })
            data.append(track_info)
    
    return data

In [5]:
# Main execution block
if __name__ == "__main__":
    client_id = os.getenv("CLIENT_ID")
    client_secret = os.getenv("CLIENT_SECRET")
    
    # Test extraction and convert to DataFrame
    data = extract_new_releases(client_id, client_secret)
    df = pd.DataFrame(data)

    # Display the DataFrame to review the data
    print(df.head())


SpotifyOauthError: No client_id. Pass it or set a SPOTIPY_CLIENT_ID environment variable.

In [11]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
total_tracks,302.0,15.324503,9.481613,1.0,9.0,16.0,21.0,34.0
acousticness,302.0,0.316032,0.28173,3.4e-05,0.061475,0.2465,0.5365,0.969
danceability,302.0,0.576356,0.138328,0.0866,0.487,0.573,0.669,0.952
energy,302.0,0.598179,0.196041,0.104,0.4465,0.6115,0.74975,0.992
instrumentalness,302.0,0.11429,0.266132,0.0,0.0,6e-06,0.010175,0.947
liveness,302.0,0.221114,0.188345,0.0408,0.107,0.1375,0.2645,0.992
loudness,302.0,-8.495914,4.914105,-25.187,-9.19925,-6.8265,-5.39,-2.889
speechiness,302.0,0.106826,0.126473,0.0235,0.033325,0.04415,0.1205,0.772
tempo,302.0,120.263944,30.256607,58.417,95.33875,119.9685,140.95825,219.993
valence,302.0,0.442038,0.216202,0.0335,0.271,0.4265,0.5985,0.965


In [7]:
def data_integrity_checks(df: pd.DataFrame):
    """
    Run a series of data integrity checks on a DataFrame.
    
    :param df: The DataFrame to check
    """
    print("DataFrame Info:")
    print(df.info())
    
    print("\nDataFrame Description:")
    print(df.describe())
    
    print("\nChecking for Null Values:")
    null_counts = df.isnull().sum()
    print(null_counts[null_counts > 0])  # Show only columns with null values
    
    print("\nChecking for Duplicates:")
    duplicates = df.duplicated().sum()
    print(f"Number of duplicate rows: {duplicates}")

    print("\nData Types of Each Column:")
    print(df.dtypes)

    print("\nUnique Values Count in Each Column:")
    unique_counts = df.nunique()
    print(unique_counts)

    print("\nFirst Few Rows of the DataFrame:")
    print(df.head())

    print("\nLast Few Rows of the DataFrame:")
    print(df.tail())

# # Example usage
# if __name__ == "__main__":
#     # Assuming `df` is your extracted DataFrame
#     data = extract_new_releases(client_id, client_secret)
#     df = pd.DataFrame(data)

#     # Run data integrity checks
#     data_integrity_checks(df)


In [8]:
data_integrity_checks(df)

DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 302 entries, 0 to 301
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   album_name        302 non-null    object 
 1   album_id          302 non-null    object 
 2   release_date      302 non-null    object 
 3   total_tracks      302 non-null    int64  
 4   artist_name       302 non-null    object 
 5   artist_id         302 non-null    object 
 6   track_name        302 non-null    object 
 7   track_id          302 non-null    object 
 8   acousticness      302 non-null    float64
 9   danceability      302 non-null    float64
 10  energy            302 non-null    float64
 11  instrumentalness  302 non-null    float64
 12  liveness          302 non-null    float64
 13  loudness          302 non-null    float64
 14  speechiness       302 non-null    float64
 15  tempo             302 non-null    float64
 16  valence           302 non-nu