In [86]:
import json
import pandas as pd
import numpy as np
import requests
import os
import time

In [7]:
#defining the path to the .json files.
data_path = "MyData/"

In [15]:
#iterate through the files in the folder and load the JSON data
data = []

#in this code, os.listdir() function retrieves a list of all files and folders in the specified directory.
#The os.path.join() function creates the complete file path by joining the folder path and the file name.
#because there is also a pdf file in the folder, I used the 'if filename.endswith("json")' condition to ensure that only the JSON files are being considered.
#finally, the JSON data is loaded using 'json.load()' and added to the 'data' list
for filename in os.listdir(data_path):
    if "Audio" in filename and filename.endswith(".json"): #a parameter for this project: only consider audio streaming history, not video
        file_path = os.path.join(data_path, filename)
        with open(file_path, "r") as file:
            json_data = json.load(file)
            data.extend(json_data)


In [16]:
#check if data is populated with JSON file contents
data

[{'ts': '2018-10-26T15:59:55Z',
  'username': 'shadrina.izzati',
  'platform': 'Android OS 7.0 API 24 (samsung, SM-G9287C)',
  'ms_played': 206866,
  'conn_country': 'JP',
  'ip_addr_decrypted': '39.110.202.5',
  'user_agent_decrypted': 'unknown',
  'master_metadata_track_name': 'Superman',
  'master_metadata_album_artist_name': 'Joe Brooks',
  'master_metadata_album_album_name': 'Constellation Me',
  'spotify_track_uri': 'spotify:track:6A3Ohyg6CzYpnAPdrh6cGw',
  'episode_name': None,
  'episode_show_name': None,
  'spotify_episode_uri': None,
  'reason_start': 'appload',
  'reason_end': 'trackdone',
  'shuffle': True,
  'skipped': None,
  'offline': False,
  'offline_timestamp': 1540569386791,
  'incognito_mode': False},
 {'ts': '2018-10-26T16:03:41Z',
  'username': 'shadrina.izzati',
  'platform': 'Android OS 7.0 API 24 (samsung, SM-G9287C)',
  'ms_played': 227500,
  'conn_country': 'JP',
  'ip_addr_decrypted': '39.110.202.5',
  'user_agent_decrypted': 'unknown',
  'master_metadata_t

In [18]:
#there are multiple JSON files separated by year. To check if all the files have been loaded

years = set()

for item in data:
    ts = item['ts']
    year = ts[:4] #extract year from timestamp
    years.add(year)

In [19]:
#years of streaming history included in this data
for year in years:
    print(year)

2016
2019
2023
2017
2022
2021
2020
2018


In [20]:
#number of data entries available
num_items = len(data)
print(num_items)

59782


In [21]:
#number of entries in each year
year_counts = {}

In [22]:
for item in data:
    ts = item['ts']
    year = ts[:4] #extract year from timestamp

    if year in year_counts:
        year_counts[year] += 1
    else:
        year_counts[year] = 1

for year, count in year_counts.items():
    print(f"Year: {year}, Count: {count}")

Year: 2018, Count: 6628
Year: 2019, Count: 11239
Year: 2020, Count: 6354
Year: 2016, Count: 6823
Year: 2017, Count: 6284
Year: 2022, Count: 11052
Year: 2023, Count: 2750
Year: 2021, Count: 8652


In [28]:
#read Streaming_History_Audio files using pandas
df_stream0 = pd.read_json('MyData/Streaming_History_Audio_2016-2018_0.json')
df_stream1 = pd.read_json('MyData/Streaming_History_Audio_2018-2020_1.json')
df_stream2 = pd.read_json('MyData/Streaming_History_Audio_2020-2022_2.json')
df_stream3 = pd.read_json('MyData/Streaming_History_Audio_2022-2023_3.json')

#merge streaming dataframes
df_stream = pd.concat([df_stream0, df_stream1, df_stream2, df_stream3])

#create a 'UniqueID' for each song by combining the fields 'artistName' and 'trackName'
df_stream['UniqueID'] = df_stream['master_metadata_album_artist_name'] + ":" + df_stream['master_metadata_track_name']

df_stream.head()

Unnamed: 0,ts,username,platform,ms_played,conn_country,ip_addr_decrypted,user_agent_decrypted,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,...,episode_show_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,offline,offline_timestamp,incognito_mode,UniqueID
0,2016-04-08T05:35:13Z,shadrina.izzati,"Android OS 5.0 API 21 (samsung, SM-G900H)",257641,ID,202.62.16.140,unknown,Open Road,Roo Panes,Weight of Your World EP,...,,,clickrow,fwdbtn,False,,False,0,False,Roo Panes:Open Road
1,2016-04-08T05:35:16Z,shadrina.izzati,"Android OS 5.0 API 21 (samsung, SM-G900H)",2799,ID,202.62.16.140,unknown,Shelter,Ray LaMontagne,Trouble,...,,,fwdbtn,fwdbtn,False,,False,0,False,Ray LaMontagne:Shelter
2,2016-04-08T05:37:33Z,shadrina.izzati,"Android OS 5.0 API 21 (samsung, SM-G900H)",137393,ID,202.62.16.140,unknown,When The Stars Go Blue,Ryan Adams,Gold,...,,,fwdbtn,fwdbtn,False,,False,0,False,Ryan Adams:When The Stars Go Blue
3,2016-04-08T05:37:57Z,shadrina.izzati,"Android OS 5.0 API 21 (samsung, SM-G900H)",23796,ID,202.62.16.140,unknown,The Breach,Dustin Tebbutt,The Breach EP,...,,,fwdbtn,fwdbtn,False,,False,0,False,Dustin Tebbutt:The Breach
4,2016-04-08T05:38:00Z,shadrina.izzati,"Android OS 5.0 API 21 (samsung, SM-G900H)",1871,ID,202.62.16.140,unknown,On Trees And Birds And Fire,I Am Oak,On Claws,...,,,fwdbtn,fwdbtn,False,,False,0,False,I Am Oak:On Trees And Birds And Fire


In [37]:
#add column with track URI stripped of 'spotify:track:'
uri = df_stream['spotify_track_uri'].str.split(":", expand=True)
ep_uri = df_stream['spotify_episode_uri'].str.split(":", expand=True)
df_stream['track_uri'] = uri[2]
df_stream['episode_uri'] = ep_uri[2]

df_stream.head()


Unnamed: 0,ts,username,conn_country,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,episode_name,episode_show_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,UniqueID,track_uri,episode_uri
0,2016-04-08T05:35:13Z,shadrina.izzati,ID,Open Road,Roo Panes,Weight of Your World EP,spotify:track:1IycYHHYjKgxvB8AHCdu7O,,,,clickrow,fwdbtn,False,,Roo Panes:Open Road,1IycYHHYjKgxvB8AHCdu7O,
1,2016-04-08T05:35:16Z,shadrina.izzati,ID,Shelter,Ray LaMontagne,Trouble,spotify:track:3V4GBwxo5UjAAjXbYES3x1,,,,fwdbtn,fwdbtn,False,,Ray LaMontagne:Shelter,3V4GBwxo5UjAAjXbYES3x1,
2,2016-04-08T05:37:33Z,shadrina.izzati,ID,When The Stars Go Blue,Ryan Adams,Gold,spotify:track:0nOe9N3bENgpIGpRecUVnZ,,,,fwdbtn,fwdbtn,False,,Ryan Adams:When The Stars Go Blue,0nOe9N3bENgpIGpRecUVnZ,
3,2016-04-08T05:37:57Z,shadrina.izzati,ID,The Breach,Dustin Tebbutt,The Breach EP,spotify:track:1WXVfFRD89EtVURD4Tbixo,,,,fwdbtn,fwdbtn,False,,Dustin Tebbutt:The Breach,1WXVfFRD89EtVURD4Tbixo,
4,2016-04-08T05:38:00Z,shadrina.izzati,ID,On Trees And Birds And Fire,I Am Oak,On Claws,spotify:track:3YnzkpXK1J9fnnoJRG8FUB,,,,fwdbtn,fwdbtn,False,,I Am Oak:On Trees And Birds And Fire,3YnzkpXK1J9fnnoJRG8FUB,


In [45]:
#clean up dataframe to only include necessary data
#specify unnecessary columns
columns_to_remove = ['platform', 'ms_played', 'ip_addr_decrypted', 'user_agent_decrypted', 'offline', 'offline_timestamp','incognito_mode', 'username']

#remove the 
df_stream = df_stream[[col for col in df_stream.columns if col not in columns_to_remove]]

In [46]:
df_stream.head()

Unnamed: 0,ts,conn_country,master_metadata_track_name,master_metadata_album_artist_name,master_metadata_album_album_name,spotify_track_uri,episode_name,episode_show_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,UniqueID,track_uri,episode_uri
0,2016-04-08T05:35:13Z,ID,Open Road,Roo Panes,Weight of Your World EP,spotify:track:1IycYHHYjKgxvB8AHCdu7O,,,,clickrow,fwdbtn,False,,Roo Panes:Open Road,1IycYHHYjKgxvB8AHCdu7O,
1,2016-04-08T05:35:16Z,ID,Shelter,Ray LaMontagne,Trouble,spotify:track:3V4GBwxo5UjAAjXbYES3x1,,,,fwdbtn,fwdbtn,False,,Ray LaMontagne:Shelter,3V4GBwxo5UjAAjXbYES3x1,
2,2016-04-08T05:37:33Z,ID,When The Stars Go Blue,Ryan Adams,Gold,spotify:track:0nOe9N3bENgpIGpRecUVnZ,,,,fwdbtn,fwdbtn,False,,Ryan Adams:When The Stars Go Blue,0nOe9N3bENgpIGpRecUVnZ,
3,2016-04-08T05:37:57Z,ID,The Breach,Dustin Tebbutt,The Breach EP,spotify:track:1WXVfFRD89EtVURD4Tbixo,,,,fwdbtn,fwdbtn,False,,Dustin Tebbutt:The Breach,1WXVfFRD89EtVURD4Tbixo,
4,2016-04-08T05:38:00Z,ID,On Trees And Birds And Fire,I Am Oak,On Claws,spotify:track:3YnzkpXK1J9fnnoJRG8FUB,,,,fwdbtn,fwdbtn,False,,I Am Oak:On Trees And Birds And Fire,3YnzkpXK1J9fnnoJRG8FUB,


In [47]:
#rename columns to make them easier to understand
column_name_mapping = {
    'conn_country': 'stream_location',
    'ts': 'timestamp',
    'master_metadata_track_name': 'track_name',
    'master_metadata_album_artist_name': 'artist_name',
    'master_metadata_album_album_name': 'album_name',
    'episode_show_name': 'podcast_name'
}

df_stream = df_stream.rename(columns=column_name_mapping)

df_stream.head()

Unnamed: 0,timestamp,stream_location,track_name,artist_name,album_name,spotify_track_uri,episode_name,podcast_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,UniqueID,track_uri,episode_uri
0,2016-04-08T05:35:13Z,ID,Open Road,Roo Panes,Weight of Your World EP,spotify:track:1IycYHHYjKgxvB8AHCdu7O,,,,clickrow,fwdbtn,False,,Roo Panes:Open Road,1IycYHHYjKgxvB8AHCdu7O,
1,2016-04-08T05:35:16Z,ID,Shelter,Ray LaMontagne,Trouble,spotify:track:3V4GBwxo5UjAAjXbYES3x1,,,,fwdbtn,fwdbtn,False,,Ray LaMontagne:Shelter,3V4GBwxo5UjAAjXbYES3x1,
2,2016-04-08T05:37:33Z,ID,When The Stars Go Blue,Ryan Adams,Gold,spotify:track:0nOe9N3bENgpIGpRecUVnZ,,,,fwdbtn,fwdbtn,False,,Ryan Adams:When The Stars Go Blue,0nOe9N3bENgpIGpRecUVnZ,
3,2016-04-08T05:37:57Z,ID,The Breach,Dustin Tebbutt,The Breach EP,spotify:track:1WXVfFRD89EtVURD4Tbixo,,,,fwdbtn,fwdbtn,False,,Dustin Tebbutt:The Breach,1WXVfFRD89EtVURD4Tbixo,
4,2016-04-08T05:38:00Z,ID,On Trees And Birds And Fire,I Am Oak,On Claws,spotify:track:3YnzkpXK1J9fnnoJRG8FUB,,,,fwdbtn,fwdbtn,False,,I Am Oak:On Trees And Birds And Fire,3YnzkpXK1J9fnnoJRG8FUB,


In [48]:
#separate dataframe just for podcast data
podcast_df = df_stream[df_stream['episode_name'].notnull()]

podcast_df.head()

Unnamed: 0,timestamp,stream_location,track_name,artist_name,album_name,spotify_track_uri,episode_name,podcast_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,UniqueID,track_uri,episode_uri
13804,2018-04-04T04:34:09Z,ID,,,,,Ellen DeGeneres,Armchair Expert with Dax Shepard,spotify:episode:4ZwVopjfEfb8nVn81nmLqf,clickrow,endplay,False,,,,4ZwVopjfEfb8nVn81nmLqf
13805,2018-04-04T11:38:18Z,ID,,,,,Kristen Bell,Armchair Expert with Dax Shepard,spotify:episode:7d3NdDcjg1jQ7QBIt5gdpC,clickrow,logout,False,,,,7d3NdDcjg1jQ7QBIt5gdpC
13806,2018-04-04T12:17:20Z,ID,,,,,Kristen Bell,Armchair Expert with Dax Shepard,spotify:episode:7d3NdDcjg1jQ7QBIt5gdpC,appload,endplay,False,,,,7d3NdDcjg1jQ7QBIt5gdpC
13807,2018-04-05T02:11:46Z,ID,,,,,Opioids: How America Got Hooked,Science Vs,spotify:episode:4KhVSxUoPVCrU3G4CvYP6Y,clickrow,logout,False,,,,4KhVSxUoPVCrU3G4CvYP6Y
13816,2018-04-05T11:29:21Z,ID,,,,,Episode #019 ... Three Islamic Truths,Philosophize This!,spotify:episode:0UvBInfhrRRnqhGoYz8q3N,clickrow,unexpected-exit-while-paused,False,,,,0UvBInfhrRRnqhGoYz8q3N


In [66]:
#separate dataframe just for music/tracklists
tracklist_df = df_stream[df_stream['track_name'].notnull()]

tracklist_df.head()

Unnamed: 0,timestamp,stream_location,track_name,artist_name,album_name,spotify_track_uri,episode_name,podcast_name,spotify_episode_uri,reason_start,reason_end,shuffle,skipped,UniqueID,track_uri,episode_uri
0,2016-04-08T05:35:13Z,ID,Open Road,Roo Panes,Weight of Your World EP,spotify:track:1IycYHHYjKgxvB8AHCdu7O,,,,clickrow,fwdbtn,False,,Roo Panes:Open Road,1IycYHHYjKgxvB8AHCdu7O,
1,2016-04-08T05:35:16Z,ID,Shelter,Ray LaMontagne,Trouble,spotify:track:3V4GBwxo5UjAAjXbYES3x1,,,,fwdbtn,fwdbtn,False,,Ray LaMontagne:Shelter,3V4GBwxo5UjAAjXbYES3x1,
2,2016-04-08T05:37:33Z,ID,When The Stars Go Blue,Ryan Adams,Gold,spotify:track:0nOe9N3bENgpIGpRecUVnZ,,,,fwdbtn,fwdbtn,False,,Ryan Adams:When The Stars Go Blue,0nOe9N3bENgpIGpRecUVnZ,
3,2016-04-08T05:37:57Z,ID,The Breach,Dustin Tebbutt,The Breach EP,spotify:track:1WXVfFRD89EtVURD4Tbixo,,,,fwdbtn,fwdbtn,False,,Dustin Tebbutt:The Breach,1WXVfFRD89EtVURD4Tbixo,
4,2016-04-08T05:38:00Z,ID,On Trees And Birds And Fire,I Am Oak,On Claws,spotify:track:3YnzkpXK1J9fnnoJRG8FUB,,,,fwdbtn,fwdbtn,False,,I Am Oak:On Trees And Birds And Fire,3YnzkpXK1J9fnnoJRG8FUB,


In [53]:
#we have our data ready. But if we have data about the tracks from spotify, it would make the data richer and better.
#To interact with Spotify API we need an Auth Token, which we can get using client ID and client secret from Spotify, we store our IDs from Spotify Developer Dashboard here

CLIENT_ID = '7ce6ca33cbd345698ef46fbbb7c3c9f6'
CLIENT_SECRET = '1bdf6b3c7beb4392a9a7df18ccf81b2a'
#note: keep this secret by storing values in separate file that we don't upload.

In [217]:
#GENERATE ACCESS TOKEN

# authentication URL
AUTH_URL = 'https://accounts.spotify.com/api/token'

# POST
auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})

#convert the response to JSON
auth_response_data = auth_response.json()

#save access token
access_token = auth_response_data['access_token']

In [218]:
#used for authenticating all API calls
headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}

#base URL for all Spotify API endpoints
BASE_URL = 'https://api.spotify.com/v1/'

In [219]:
#create blank dictionary to store track URI, artist URI, and genres
feature_dict = {}

#convert track_uri column to iterable list
track_uris = tracklist_df['track_uri'].to_list()

In [220]:
#remove duplicates in track list
track_uris = list(set(track_uris))

In [222]:
# Function to get a new access token
def refresh_token():
    # Perform the necessary steps to refresh the token
    auth_response = requests.post(AUTH_URL, {
    'grant_type': 'client_credentials',
    'client_id': CLIENT_ID,
    'client_secret': CLIENT_SECRET,
})
    auth_response_data = auth_response.json()
    # and return the new access token
    new_token = auth_response_data['access_token']
    return new_token

# Split the track URIs into batches of 5
batches = [track_uris[i:i+30] for i in range(0, len(track_uris), 30)]
count = 1

for batch in batches:
    # Check if it's time to refresh the token
    
    if access_token is None:
        access_token = refresh_token()
        print('Token refreshed.')

    headers = {'Authorization': 'Bearer {token}'.format(token=access_token)}

    #Create a comma-separated string of the track URIs in the batch
    uris = ','.join(batch)

    print(f"batch #{count}: start request for {uris}")

    time.sleep(0.5)  # Delay of 2 seconds between requests

    r = requests.get(BASE_URL + 'tracks?ids=' + uris, headers=headers)
    if r.status_code != 200:
        print('Error:', r.status_code, r.content)
        if r.status_code == 401:
            refresh_token()
    else:
        r = r.json()
        if r is not None and 'tracks' in r:
            for track in r['tracks']:
                if track is not None and 'uri' in track:
                    t_uri = track['uri']
                    feature_dict[t_uri] = {
                    'popularity': track['popularity'],
                    'danceability': 0,
                    'energy': 0,
                    'speechiness': 0,
                    'instrumentalness': 0,
                    'liveness': 0,
                    'loudness': 0,
                    'speechiness': 0,
                    'tempo': 0,
                    'valence': 0
                }

         # Check if 'album' exists and is a dictionary
                    if 'album' in track and isinstance(track['album'], dict):
                        # Check if 'genres' exist in the track's 'album' object
                        if 'genres' in track['album']:
                            feature_dict[t_uri]['genre'] = track['album']['genres']
                        else:
                            # Check if 'artists' exist and is a list with at least one artist
                            if 'artists' in track and isinstance(track['artists'], list) and track['artists']:
                                # Check if 'genres' exist in the first artist's object
                                if 'genres' in track['artists'][0]:
                                    feature_dict[t_uri]['genre'] = track['artists'][0]['genres']
                                else:
                                    feature_dict[t_uri]['genre'] = []
                            else:
                                feature_dict[t_uri]['genre'] = []
                    else:
                        feature_dict[t_uri]['genre'] = []

    time.sleep(1)  # Delay of 2 seconds between requests

    s = requests.get(BASE_URL + 'audio-features?ids=' + uris, headers=headers)
    if s.status_code != 200:
        print('Error:', s.status_code, s.content)
        if s.status_code == 401:
            refresh_token()
    else:
        s = s.json()
        if s is not None and 'audio_features' in s:
            for audio_feature in s['audio_features']:
                if audio_feature is not None and 'uri' in audio_feature:
                    t_uri = audio_feature['uri']
                    if t_uri in feature_dict:
                        feature_dict[t_uri]['danceability'] = audio_feature['danceability']
                        feature_dict[t_uri]['energy'] = audio_feature['energy']
                        feature_dict[t_uri]['speechiness'] = audio_feature['speechiness']
                        feature_dict[t_uri]['instrumentalness'] = audio_feature['instrumentalness']
                        feature_dict[t_uri]['liveness'] = audio_feature['liveness'],
                        feature_dict[t_uri]['loudness'] = audio_feature['loudness']
                        feature_dict[t_uri]['speechiness'] = audio_feature['speechiness']
                        feature_dict[t_uri]['tempo'] = audio_feature['tempo']
                        feature_dict[t_uri]['valence'] = audio_feature['valence']

    time.sleep(1)  # Delay of 2 seconds between requests

    count += 1

    print(f"completed request for {uris}")

batch #1: start request for 4zGtsXOEpHDfEIZvbn5wnz,5J8oPTjP3pqAG61p2IlksY,0axZ1pkds89AVTvko3zg7z,3aIwehAR8RzmP7HEqX5zR8,7An9jFpCwjxvSzalvQICBd,76YNvkC0hNHWVFRXU7IgiQ,6WYsKS5JmGzlDdPbD4gwhw,0NuWgxEp51CutD2pJoF4OM,2fPb58e6f8KxejYDCaARwS,74YrN8gcU3WFnG3Fu99Eqk,2RTlhFU9bksuYdA8ROVdmj,58ZKw23U9pY7k5ZgS30CAH,1RRZSm4akqNyMOsPUhw4cb,6KMDzk8RU7ZOvEIr1tCvRS,6ggCUjC5j2uhighYnTee8X,6rTPgxS3weamFQtN6xN4KZ,49uFhk5Ka3CHVA1Gg6gvn0,3opafmcv7GdeqgSbFXf9AF,07M76e7IXxYdnC1BBiJxEX,6JGRktSYyPOqIlUqxuEKQD,6nGeohj2m2R3SJMe09eAex,2VMHjC3ALWlCHf59kocX34,0aehdLkMY8nYrTGrncpDnY,70eFcWOvlMObDhURTqT4Fv,70khXICDeTTxgYtw3EysKH,63AbhsGQUdQVyA9vn65pUy,2ohegz9maxzroKBu9YhcCM,4KCGYOHGExcKUlFkNajeVO,4fObHdti4br5jqO5iygxef,5qioxs2Zkb3V5WtOoMHT64
completed request for 4zGtsXOEpHDfEIZvbn5wnz,5J8oPTjP3pqAG61p2IlksY,0axZ1pkds89AVTvko3zg7z,3aIwehAR8RzmP7HEqX5zR8,7An9jFpCwjxvSzalvQICBd,76YNvkC0hNHWVFRXU7IgiQ,6WYsKS5JmGzlDdPbD4gwhw,0NuWgxEp51CutD2pJoF4OM,2fPb58e6f8KxejYDCaARwS,74YrN8gcU3WFnG3Fu99Eqk,2RTlhFU9bksuYdA8ROVdmj,58ZKw23

In [189]:
print(f"in feature_dict: {len(feature_dict)} tracks found. In track_uris: {len(track_uris)} tracks found")

in feature_dict: 12602 tracks found. In track_uris: 12603 tracks found


In [227]:
print(dict(list(feature_dict.items())[0: 5]))

{'spotify:track:4zGtsXOEpHDfEIZvbn5wnz': {'popularity': 42, 'danceability': 0.466, 'energy': 0.142, 'speechiness': 0.106, 'instrumentalness': 0.912, 'liveness': (0.115,), 'loudness': -17.208, 'tempo': 97.448, 'valence': 0.299, 'genre': []}, 'spotify:track:5J8oPTjP3pqAG61p2IlksY': {'popularity': 27, 'danceability': 0.276, 'energy': 0.579, 'speechiness': 0.0282, 'instrumentalness': 0.00269, 'liveness': (0.118,), 'loudness': -4.918, 'tempo': 76.508, 'valence': 0.146, 'genre': []}, 'spotify:track:0axZ1pkds89AVTvko3zg7z': {'popularity': 42, 'danceability': 0.319, 'energy': 0.753, 'speechiness': 0.0457, 'instrumentalness': 0, 'liveness': (0.0621,), 'loudness': -4.745, 'tempo': 93.806, 'valence': 0.579, 'genre': []}, 'spotify:track:3aIwehAR8RzmP7HEqX5zR8': {'popularity': 40, 'danceability': 0.514, 'energy': 0.00458, 'speechiness': 0.0545, 'instrumentalness': 0.945, 'liveness': (0.0743,), 'loudness': -31.894, 'tempo': 70.884, 'valence': 0.135, 'genre': []}, 'spotify:track:7An9jFpCwjxvSzalvQICB

In [225]:
# Convert dictionary to dataframe
feature_df = pd.DataFrame.from_dict(feature_dict, orient='index')
feature_df.index.name = 'track_uri'

# Reorder columns
columns_order = ['popularity', 'genre', 'danceability', 'energy', 'speechiness', 'instrumentalness', 'liveness', 'loudness', 'tempo', 'valence']
feature_df = feature_df.reindex(columns_order, axis=1)


In [226]:
feature_df.head()

Unnamed: 0_level_0,popularity,genre,danceability,energy,speechiness,instrumentalness,liveness,loudness,tempo,valence
track_uri,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
spotify:track:4zGtsXOEpHDfEIZvbn5wnz,42,[],0.466,0.142,0.106,0.912,"(0.115,)",-17.208,97.448,0.299
spotify:track:5J8oPTjP3pqAG61p2IlksY,27,[],0.276,0.579,0.0282,0.00269,"(0.118,)",-4.918,76.508,0.146
spotify:track:0axZ1pkds89AVTvko3zg7z,42,[],0.319,0.753,0.0457,0.0,"(0.0621,)",-4.745,93.806,0.579
spotify:track:3aIwehAR8RzmP7HEqX5zR8,40,[],0.514,0.00458,0.0545,0.945,"(0.0743,)",-31.894,70.884,0.135
spotify:track:7An9jFpCwjxvSzalvQICBd,47,[],0.661,0.131,0.0685,5e-06,"(0.112,)",-11.632,74.862,0.238


In [200]:
#convert dataframes to csv
#dataframes to import:
## feature_df: list of tracks' audio features
## tracklist_df: list of audio track history
## podcast_df: list of podcast history

# Specify the folder name
folder_name = 'output'

# Create the new folder in the working directory
folder_path = os.path.join(os.getcwd(), folder_name)
os.makedirs(folder_path, exist_ok=True)

# Specify the file names
feature_filename = 'audio_feature.csv'
tracklist_filename = 'tracklist_history.csv'
podcast_filename = 'podcast_history.csv'

# Construct the full file paths
file1_path = os.path.join(folder_path, feature_filename)
file2_path = os.path.join(folder_path, tracklist_filename)
file3_path = os.path.join(folder_path, podcast_filename)

# Convert DataFrames to CSV and save to the specified folder
feature_df.to_csv(file1_path)
tracklist_df.to_csv(file2_path, index=False)
podcast_df.to_csv(file3_path, index=False)