### Importing libraries

In [3]:
import json
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans

In [4]:
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(
    client_id='id',
    client_secret='secret'))

### Defining a function for analyzing playlists

In [9]:
def playlist_analyzer(user, playlist_id):    
    names = []
    uris = []
    artists = []
    popularity = []
    danceability = []
    energy = []
    key = []
    loudness = []
    mode = []
    speechiness = []
    acousticness = []
    instrumentalness =[]
    liveness=[]
    valence = []
    tempo =[]
    duration_ms =[]
    time_signature =[]
    playlist = spotify.user_playlist_tracks(user=user, playlist_id=playlist_id)
    tracks = playlist['items']
    
    while playlist['next']:
        playlist = spotify.next(playlist)
        tracks.extend(playlist['items'])
   
    for item in tracks:
        names.append(item["track"]["name"])
        uri = item["track"]["uri"]
        uris.append(uri)
        popularity.append(item["track"]["popularity"])
        artists.append([artist["name"] for artist in item["track"]["artists"]])
        features = spotify.audio_features(uri)[0]
        danceability.append(features["danceability"])
        energy.append(features["energy"])
        key.append(features["key"])
        loudness.append(features["loudness"])
        mode.append(features["mode"])
        speechiness.append(features["speechiness"])
        acousticness.append(features["acousticness"])
        instrumentalness.append(features["instrumentalness"])
        liveness.append(features["liveness"])
        valence.append(features["valence"])
        tempo.append(features["tempo"])
        duration_ms.append(features["duration_ms"])
        time_signature.append(features["time_signature"])

    df = pd.DataFrame({"names":names, 
                      "uri": uris,
                      "artists": artists,
                      "popularity": popularity,
                        "danceability":danceability,
                       "energy":energy,
                       "key": key,
                       "loudness":loudness,
                       "mode": mode,
                       "speechiness":speechiness,
                       "acousticness":acousticness,
                       "instrumentalness":instrumentalness,
                       "liveness": liveness,
                       "valence":valence,
                       "tempo": tempo,
                       "duration_ms":duration_ms,
                       "time_signature": time_signature})

    return df

### Getting data from long playlists

In [30]:
df1 = playlist_analyzer("spotify", "spotify:playlist:6FKDzNYZ8IW1pvYVF4zUN2")

In [26]:
df2 = playlist_analyzer("spotify", "spotify:playlist:3goXIN5tAZrFw2gm2wrMlO")

In [13]:
df3 = playlist_analyzer("spotify", "spotify:playlist:532F1h299qKD894BlPfJJF")

In [29]:
df4 = playlist_analyzer("spotify", "spotify:playlist:32twOqGf8gIswTgzG3IKxP")

In [16]:
df5 = playlist_analyzer("spotify", "spotify:playlist:5bZOu1Yz2Xsd3ZB2HFumrv")

In [25]:
df6 = playlist_analyzer("spotify", "spotify:playlist:4p4dEXRcGSE0IkSUyCeO8h")

In [20]:
df7 = playlist_analyzer("spotify", "spotify:playlist:5Y4rSfjF23SUR4NG50pzsL")

In [21]:
df8 = playlist_analyzer("spotify", "spotify:playlist:1XhVM7jWPrGLTiNiAy97Za")

In [22]:
df9 = playlist_analyzer("spotify", "spotify:playlist:3nHIMwsBWQnUKkJOEMrGfk")

In [23]:
df10 = playlist_analyzer("spotify", "spotify:playlist:6COQUh7Ea221hI5mufpMoX")

In [31]:
df11 = playlist_analyzer("spotify", "spotify:playlist:0RA4vJlNcWkp9bjkjm8DS1")

In [2]:
playlist_df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11], ignore_index = True)

NameError: name 'pd' is not defined

In [1]:
playlist_df

NameError: name 'playlist_df' is not defined

In [36]:
features_playlist = playlist_df.iloc[:,4:17]

## Transforming data and creating clusters of the playlist dataframe

In [5]:
# Initialize the model, fit and transform data
scaler = StandardScaler()
scaler.fit(features_playlist)
features_scaled = scaler.transform(features_playlist)
features_scaled_df = pd.DataFrame(features_scaled)

NameError: name 'features_playlist' is not defined

In [1]:
features_scaled_df

NameError: name 'features_scaled_df' is not defined

#### KMeans

In [41]:
# Initializing the model, fit and predict clusters
kmeans = KMeans()
kmeans.fit(features_scaled)
clusters = kmeans.predict(features_scaled)
playlist_df["cluster"]=clusters

In [44]:
# Check the size of the clusters
pd.Series(clusters).value_counts().sort_index()

0     7718
1    12972
2     9893
3     5447
4     2411
5     8816
6     6582
7     3073
dtype: int64

#### Exporting the tables to csv

In [None]:
features_playlist.to_csv("../Datasets/features_playlist.csv", index=False)
playlist_df.to_csv("../Datasets/playlist_df.csv", index=False)
features_scaled_df.to_csv("../Datasets/features_scales_df.csv", index=False)