## Import All Required Libraries

In [1]:
# Import all connection libraries

from spotipy import SpotifyClientCredentials
import spotipy
import sqlalchemy as sql

# Import all data manipulation libraries

import pandas as pd
from pandarallel import pandarallel
from tqdm import tqdm
pandarallel.initialize(verbose=0, nb_workers=8)

In [2]:
from functools import lru_cache

# Use the lru_cache decorator to cache the result of the function
# maxsize=1000 means the cache will store the result of up to 1000 items
@lru_cache(maxsize=1000)
def get_playlist_tracks(playlist_uri):
    """
    This function uses the Spotify API to get the tracks of a given playlist.
    The function is decorated with the lru_cache decorator to cache the result for each unique playlist_uri.
    So, if the same playlist_uri is passed to the function again, the cached result will be returned
    instead of making a new API call.
    :param playlist_uri: The Spotify URI of the playlist
    :return: A DataFrame containing the tracks of the playlist
    """
    # Use the Spotify API to get the tracks of the playlist
    tracks = pd.json_normalize(sp.playlist_tracks(playlist_uri), record_path=['items'])
    return tracks

## Establish External Connections

In [3]:
# establish connection to Spotify API

cid = 'e5448a8a4fdc4b5d98b44e956d50546d'
secret = '8924c0394d3f49a4a569fc03e891aa1b'
client_credentials = SpotifyClientCredentials(client_id=cid, client_secret=secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials, requests_timeout=15, retries=10)

# establish connection to Postgres

engine = sql.create_engine('postgresql+psycopg2://postgres:DataNerd2023!!\
@localhost/spotify')

## Load Initial Training Data

In [4]:
# load training data
new_batch = pd.read_csv('C:\\Users\\Chase\\OneDrive\\Documents\\UVU-2022-2023\\distinct_playlists_new.csv')[['playlist_uri', 'playlist_name']]
new_batch['playlist_uri'] = new_batch['playlist_uri'].str.strip()

db_query = pd.read_sql('SELECT DISTINCT playlist_uri FROM playlist_tracks', engine)

outer = new_batch.merge(db_query, how='outer', indicator=True)
anti_join = outer[(outer._merge=='left_only')].drop('_merge', axis=1)

new_batch = pd.DataFrame(anti_join)
new_batch.head(10)

Unnamed: 0,playlist_uri,playlist_name
25303,spotify:playlist:3dZtMbtwYDyZKsF6ff6LmI,New Flavor
25311,spotify:playlist:3XNR5Xh8RCxO4EAOA9atC1,High Fashion
25313,spotify:playlist:37i9dQZF1DX0jNl6iUx9qC,FINAL FANTASY -relax-
25455,spotify:playlist:1M35HrKYW1dSAkeExKz5o5,Final Fantasy LoFi Mix
25456,spotify:playlist:37i9dQZF1DX11VZ9hcBiq6,Traditional Scottish Folk
25457,spotify:playlist:5vqoxc95T2Q1TfFaJwoVWc,TAGGA TILL FÖR FAAN🤪🚀
25458,spotify:playlist:7Mszg2SSclbM2lkr1iR5jx,Feeling songs Kannada
25459,spotify:playlist:75VfCRPrqQqvtFxPNTgafd,fuck fake friends.
25460,spotify:playlist:5RprNn6f2p0PsjrAdvaexJ,Frankie Ruiz exitos
25461,spotify:playlist:3Bpys5SxSxiuHoZoolwRvA,Förfest 2023 🥂 förfesta & känn peppen


## Iterate Through Training Data to Change Grain of Data

In [5]:
def load_all_data():
        # load training data
        new_batch = pd.read_csv('C:\\Users\\Chase\\OneDrive\\Documents\\UVU-2022-2023\\distinct_playlists_new.csv')[['playlist_uri', 'playlist_name']]
        new_batch['playlist_uri'] = new_batch['playlist_uri'].str.strip()

        db_query = pd.read_sql('SELECT DISTINCT playlist_uri FROM playlist_tracks', engine)

        outer = new_batch.merge(db_query, how='outer', indicator=True)
        anti_join = outer[(outer._merge=='left_only')].drop('_merge', axis=1)

        new_batch = pd.DataFrame(anti_join)
        new_batch = new_batch[0:18]
        
        # extract all tracks in playlists
        load_batch = []
        series = new_batch['playlist_uri'].to_dict()
        for playlist in tqdm(series.values()):
                try:
                        tracks = get_playlist_tracks(playlist)
                        if len(tracks) >= 100:
                                tracks2 = pd.json_normalize(sp.playlist_tracks(playlist, offset=100), record_path=['items'])
                                tracks3 = pd.json_normalize(sp.playlist_tracks(playlist, offset=200), record_path=['items'])
                                tracks = pd.concat([tracks, tracks2, tracks3])
                                secondary_data = pd.json_normalize(sp.playlist(playlist))
                                tracks['playlist_uri'] = playlist
                                tracks = tracks.merge(secondary_data, left_on='playlist_uri', right_on='uri')
                                tracks = tracks.rename(columns={"name":"playlist_name", "track.name":"track_name", "track.uri":"track_uri", "track.album.name":"album_name", "track.explicit":"isExplicit", "track.album.release_date":"release_date", "track.duration_ms":"duration_ms", "track.album.uri":"album_uri", "added_by.external_urls.spotify": "added_by_external_urls_spotify", 'added_by.href':"added_by_href", "added_by.id":"added_by_id", "added_by.type":"added_by_type", "added_by.uri":"added_by_uri", "track.album.album_type":"track_album_album_type", "track.album.external_urls.spotify":"track_album_external_urls_spotify", "track.album.href":"track_album_href", "track.album.id":"track_album_id", "track.album.release_date_precision":"track_album_release_date_precision", "track.album.total_tracks":"track_album_total_tracks","track.album.type":"track_album_type", "track.disc_number":"track_disc_number", "track.episode":"track_episode", "track.external_ids.isrc":"track_external_ids_isrc", "track.external_urls.spotify":"track_external_ids_spotify", "track.href":"track_href", "track.id":"track_id", "track.is_local":"track_is_local", "track.popularity":"track_popularity", "track.preview_url":"track_preview_url", "track.track":"track_track", "track.track_number":"track_track_number", "track.type":"track_type", "video_thumbnail.url":"video_thumbnail_url", "external_urls.spotify":"external_urls_spotify", "followers.href":"followers_href", "followers.total":"followers_total", "owner.display_name":"owner_display_name", "owner.external_urls.spotify":"owner_external_urls_spotify", "owner.href":"owner_href", "owner.id":"owner_id", "owner.type":"owner_type", "owner.uri":"owner_uri", "tracks.href":"tracks_href", "tracks.limit":"tracks_limit", "tracks.next":"tracks_next", "tracks.offset":"tracks_offset", "tracks.previous":"tracks_previous", "tracks.total":"tracks_total"})
                                tracks = tracks.drop(columns=['track.album.artists', 'track.album.available_markets', 'track.album.images', 'track.artists', 'track.available_markets', 'images', 'tracks.items'])

                                load_batch.append(tracks)
                        else:
                                secondary_data = pd.json_normalize(sp.playlist(playlist))
                                tracks['playlist_uri'] = playlist
                                tracks = tracks.merge(secondary_data, left_on='playlist_uri', right_on='uri')
                                tracks = tracks.rename(columns={"name":"playlist_name", "track.name":"track_name", "track.uri":"track_uri", "track.album.name":"album_name", "track.explicit":"isExplicit", "track.album.release_date":"release_date", "track.duration_ms":"duration_ms", "track.album.uri":"album_uri", "added_by.external_urls.spotify": "added_by_external_urls_spotify", 'added_by.href':"added_by_href", "added_by.id":"added_by_id", "added_by.type":"added_by_type", "added_by.uri":"added_by_uri", "track.album.album_type":"track_album_album_type", "track.album.external_urls.spotify":"track_album_external_urls_spotify", "track.album.href":"track_album_href", "track.album.id":"track_album_id", "track.album.release_date_precision":"track_album_release_date_precision", "track.album.total_tracks":"track_album_total_tracks","track.album.type":"track_album_type", "track.disc_number":"track_disc_number", "track.episode":"track_episode", "track.external_ids.isrc":"track_external_ids_isrc", "track.external_urls.spotify":"track_external_ids_spotify", "track.href":"track_href", "track.id":"track_id", "track.is_local":"track_is_local", "track.popularity":"track_popularity", "track.preview_url":"track_preview_url", "track.track":"track_track", "track.track_number":"track_track_number", "track.type":"track_type", "video_thumbnail.url":"video_thumbnail_url", "external_urls.spotify":"external_urls_spotify", "followers.href":"followers_href", "followers.total":"followers_total", "owner.display_name":"owner_display_name", "owner.external_urls.spotify":"owner_external_urls_spotify", "owner.href":"owner_href", "owner.id":"owner_id", "owner.type":"owner_type", "owner.uri":"owner_uri", "tracks.href":"tracks_href", "tracks.limit":"tracks_limit", "tracks.next":"tracks_next", "tracks.offset":"tracks_offset", "tracks.previous":"tracks_previous", "tracks.total":"tracks_total"})
                                tracks = tracks.drop(columns=['track.album.artists', 'track.album.available_markets', 'track.album.images', 'track.artists', 'track.available_markets', 'images', 'tracks.items'])
                                load_batch.append(tracks)
                except:
                        pass
        load_batch = pd.concat(load_batch)
        if set(['track.album.is_playable','track.album.restrictions.reason', 'track.external_ids.spotify', 'track.album.album_group']).issubset(load_batch.columns):
                load_batch = load_batch.drop(columns=['track.album.is_playable', 'track.album.restrictions.reason', 'track.external_ids.spotify', 'track.is_playable', 'track.album.album_group'])
        else:
                load_batch
        load_batch.to_sql('playlist_tracks', engine, if_exists='append')
        print(f'playlist_tracks updated #{i}')
        
        # extract all artists
        df = pd.read_sql('''SELECT DISTINCT track_uri FROM artists''', engine)
        outer = load_batch.merge(df, how='outer', indicator=True)
        anti_join = outer[(outer._merge=='left_only')].drop('_merge', axis=1)

        df = pd.Series(anti_join['track_uri'])
        base_list = []
        for track in tqdm(df):
                try:
                        df2 = pd.json_normalize(sp.track(track), record_path=['artists'])
                        df2['track_uri'] = track
                        df2 = df2[['name', 'track_uri']]
                        base_list.append(df2)
                except:
                        pass
        df2 = pd.concat(base_list)
        
        df2['RN'] = df2.groupby("track_uri")["name"].rank(method="first", ascending=True)
        df2 = df2.pivot(index='track_uri', columns=['RN'], values='name').reset_index()
        df2 = df2.rename(columns={1.0:'artist1', 2.0:'artist2', 3.0:'artist3', 4.0:'artist4', 5.0:'artist5'})
        df2 = df2[['track_uri', 'artist1', 'artist2', 'artist3', 'artist4', 'artist5']]
        
        df2.to_sql('artists', engine, if_exists='append')
        print(f'artists updated #{i}')
        
        # extract all audio_features
        df = pd.read_sql('''SELECT DISTINCT track_uri FROM audio_features''', engine)
        outer = load_batch.merge(df, how='outer', indicator=True)
        anti_join = outer[(outer._merge=='left_only')].drop('_merge', axis=1)

        df = pd.Series(anti_join['track_uri'])

        base_list = []
        for track in tqdm(df):
                try:
                        df3 = pd.json_normalize(sp.audio_features(track))
                        df3['track_uri'] = track
                        df3 = df3[['track_uri', 'danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature']]
                        base_list.append(df3)
                except:
                        pass
        df3 = pd.concat(base_list)
        
        df3.to_sql('audio_features', con=engine, if_exists='append')
        print(f'audio_features updated #{i}')

In [6]:
i = 1

while i <= 8:
    load_all_data()
    i += 1

 11%|█         | 2/18 [00:00<00:05,  2.86it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:15<00:00,  1.16it/s]


playlist_tracks updated #1


100%|██████████| 524/524 [00:56<00:00,  9.27it/s]


artists updated #1


100%|██████████| 524/524 [00:45<00:00, 11.56it/s]


audio_features updated #1


 11%|█         | 2/18 [00:00<00:01,  8.53it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:13<00:00,  1.37it/s]


playlist_tracks updated #2


100%|██████████| 700/700 [01:16<00:00,  9.18it/s]


artists updated #2


100%|██████████| 700/700 [00:55<00:00, 12.63it/s]


audio_features updated #2


 11%|█         | 2/18 [00:00<00:02,  6.98it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:15<00:00,  1.15it/s]


playlist_tracks updated #3


  1%|          | 4/576 [00:00<01:46,  5.40it/s]Expected id of type track but found type episode spotify:episode:5xvcT60J0eoVXttoDh9hOn
HTTP Error for GET to https://api.spotify.com/v1/tracks/5xvcT60J0eoVXttoDh9hOn with Params: {'market': None} returned 404 due to Non existing id: 'spotify:track:5xvcT60J0eoVXttoDh9hOn'
  1%|          | 6/576 [00:01<01:31,  6.22it/s]Expected id of type track but found type episode spotify:episode:1NxaVvqibwKieA576RF9v3
HTTP Error for GET to https://api.spotify.com/v1/tracks/1NxaVvqibwKieA576RF9v3 with Params: {'market': None} returned 404 due to Non existing id: 'spotify:track:1NxaVvqibwKieA576RF9v3'
  1%|          | 7/576 [00:01<01:32,  6.14it/s]Expected id of type track but found type episode spotify:episode:0H8EqR2SzyfC5nOeYi2AMa
HTTP Error for GET to https://api.spotify.com/v1/tracks/0H8EqR2SzyfC5nOeYi2AMa with Params: {'market': None} returned 404 due to Non existing id: 'spotify:track:0H8EqR2SzyfC5nOeYi2AMa'
  1%|▏         | 8/576 [00:01<01:21,  6.

artists updated #3


  1%|          | 4/576 [00:00<00:51, 11.10it/s]Expected id of type track but found type episode spotify:episode:5xvcT60J0eoVXttoDh9hOn
  1%|          | 6/576 [00:00<00:47, 11.95it/s]Expected id of type track but found type episode spotify:episode:1NxaVvqibwKieA576RF9v3
Expected id of type track but found type episode spotify:episode:0H8EqR2SzyfC5nOeYi2AMa
  1%|▏         | 8/576 [00:00<00:43, 12.95it/s]Expected id of type track but found type episode spotify:episode:0vjh0ByqByJ3lOQKD3CwXj
Expected id of type track but found type episode spotify:episode:3MPTdDzCRi1hGIiDGqqO6C
  2%|▏         | 10/576 [00:00<00:41, 13.48it/s]Expected id of type track but found type episode spotify:episode:178hThCmkecg9Zuoj1GJWu
Expected id of type track but found type episode spotify:episode:23gLbTEXmhluZNYMP4ASb0
  6%|▌         | 34/576 [00:02<00:35, 15.21it/s]Expected id of type track but found type episode spotify:episode:1eyY8771G9pLhyQM7dt1ct
  7%|▋         | 40/576 [00:02<00:35, 15.17it/s]Expected id

audio_features updated #3


 11%|█         | 2/18 [00:00<00:02,  6.82it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
 67%|██████▋   | 12/18 [00:11<00:08,  1.35s/it]HTTP Error for GET to https://api.spotify.com/v1/playlists/6YYMc3ghgOHVE8aC2N2vMw/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:16<00:00,  1.11it/s]


playlist_tracks updated #4


100%|██████████| 987/987 [01:43<00:00,  9.51it/s]


artists updated #4


100%|██████████| 988/988 [01:20<00:00, 12.21it/s]


audio_features updated #4


 11%|█         | 2/18 [00:00<00:02,  7.30it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
HTTP Error for GET to https://api.spotify.com/v1/playlists/6YYMc3ghgOHVE8aC2N2vMw/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:15<00:00,  1.15it/s]


playlist_tracks updated #5


100%|██████████| 822/822 [01:31<00:00,  8.96it/s]


artists updated #5


100%|██████████| 822/822 [01:02<00:00, 13.05it/s]


audio_features updated #5


  6%|▌         | 1/18 [00:00<00:03,  4.30it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
 11%|█         | 2/18 [00:00<00:03,  5.09it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/6YYMc3ghgOHVE8aC2N2vMw/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:12<00:00,  1.47it/s]


playlist_tracks updated #6


100%|██████████| 569/569 [00:56<00:00, 10.05it/s]


artists updated #6


100%|██████████| 570/570 [00:52<00:00, 10.94it/s]


audio_features updated #6


  0%|          | 0/18 [00:00<?, ?it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
 11%|█         | 2/18 [00:00<00:02,  5.70it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/6YYMc3ghgOHVE8aC2N2vMw/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:17<00:00,  1.01it/s]


playlist_tracks updated #7


 24%|██▎       | 196/829 [00:20<01:00, 10.40it/s]Expected id of type track but found type episode spotify:episode:1cGMaqvEzDEWAD7ZMIxWFQ
HTTP Error for GET to https://api.spotify.com/v1/tracks/1cGMaqvEzDEWAD7ZMIxWFQ with Params: {'market': None} returned 404 due to Non existing id: 'spotify:track:1cGMaqvEzDEWAD7ZMIxWFQ'
 38%|███▊      | 318/829 [00:32<00:47, 10.86it/s]Expected id of type track but found type episode spotify:episode:3zCR9HGr0GIQnZhRIxBUvK
HTTP Error for GET to https://api.spotify.com/v1/tracks/3zCR9HGr0GIQnZhRIxBUvK with Params: {'market': None} returned 404 due to Non existing id: 'spotify:track:3zCR9HGr0GIQnZhRIxBUvK'
100%|██████████| 829/829 [01:26<00:00,  9.55it/s]


artists updated #7


 24%|██▍       | 198/831 [00:18<00:40, 15.52it/s]Expected id of type track but found type episode spotify:episode:1cGMaqvEzDEWAD7ZMIxWFQ
 39%|███▊      | 320/831 [00:26<00:32, 15.73it/s]Expected id of type track but found type episode spotify:episode:3zCR9HGr0GIQnZhRIxBUvK
100%|██████████| 831/831 [01:14<00:00, 11.10it/s]


audio_features updated #7


  6%|▌         | 1/18 [00:00<00:01,  9.87it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DX0jNl6iUx9qC/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
 11%|█         | 2/18 [00:00<00:01,  8.90it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/6YYMc3ghgOHVE8aC2N2vMw/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
 50%|█████     | 9/18 [00:04<00:06,  1.48it/s]HTTP Error for GET to https://api.spotify.com/v1/playlists/37i9dQZF1DXcyPBAlQvdpJ/tracks with Params: {'limit': 100, 'offset': 0, 'fields': None, 'market': None, 'additional_types': 'track'} returned 404 due to Not found.
100%|██████████| 18/18 [00:14<00:00,  1.26it/s]


playlist_tracks updated #8


100%|██████████| 1151/1151 [01:55<00:00, 10.00it/s]


artists updated #8


100%|██████████| 1152/1152 [01:36<00:00, 11.92it/s]


audio_features updated #8


In [7]:
all_data1 = pd.read_sql('''SELECT DISTINCT playlist_uri, playlist_name, owner_uri, owner_display_name, collaborative, description, followers_total,
af.track_uri, track_name, artist1, artist2, artist3, artist4, artist5, album_uri, album_name, release_date, 
added_by_uri, added_at, track_album_total_tracks, af.duration_ms, "isExplicit", track_popularity, 
track_preview_url, video_thumbnail_url, danceability, energy, loudness, key, mode, acousticness, speechiness, 
instrumentalness, liveness, valence, tempo, time_signature
FROM playlist_tracks pt
JOIN artists a ON pt.track_uri = a.track_uri
JOIN audio_features af ON pt.track_uri = af.track_uri
ORDER BY playlist_uri, track_uri
LIMIT 1894425;''', engine)
f = open("C:\\Users\\Chase\\OneDrive\\Documents\\Career Development\\Data Science Club\\Spring 2023 Club Project\\oltp_output1.csv", "w")
f.truncate()
f.close()

all_data1.to_csv('C:\\Users\\Chase\\OneDrive\\Documents\\Career Development\\Data Science Club\\Spring 2023 Club Project\\oltp_output1.csv', mode="w+", index=False)

In [8]:
all_data2 = pd.read_sql('''SELECT DISTINCT playlist_uri, playlist_name, owner_uri, owner_display_name, collaborative, description, followers_total,
af.track_uri, track_name, artist1, artist2, artist3, artist4, artist5, album_uri, album_name, release_date, 
added_by_uri, added_at, track_album_total_tracks, af.duration_ms, "isExplicit", track_popularity, 
track_preview_url, video_thumbnail_url, danceability, energy, loudness, key, mode, acousticness, speechiness, 
instrumentalness, liveness, valence, tempo, time_signature
FROM playlist_tracks pt
JOIN artists a ON pt.track_uri = a.track_uri
JOIN audio_features af ON pt.track_uri = af.track_uri
ORDER BY playlist_uri, track_uri
OFFSET 1894425;''', engine)
f = open("C:\\Users\\Chase\\OneDrive\\Documents\\Career Development\\Data Science Club\\Spring 2023 Club Project\\oltp_output2.csv", "w")
f.truncate()
f.close()

all_data2.to_csv('C:\\Users\\Chase\\OneDrive\\Documents\\Career Development\\Data Science Club\\Spring 2023 Club Project\\oltp_output2.csv', mode="w+", index=False)

In [9]:
features1 = all_data1[['track_uri', 'track_name', 'artist1', 'artist2', 'artist3', 'artist4', 'artist5', 'album_name', 'release_date', 'danceability', 'energy', 'track_popularity', 'acousticness', 'valence', 'tempo']]
features2 = all_data2[['track_uri', 'track_name', 'artist1', 'artist2', 'artist3', 'artist4', 'artist5', 'album_name', 'release_date', 'danceability', 'energy', 'track_popularity', 'acousticness', 'valence', 'tempo']]
features = pd.concat([features1, features2])
features = features.drop_duplicates(subset='track_uri')
features.to_parquet('C:\\Users\\Chase\\OneDrive\\Documents\\UVU-2022-2023\\features.parquet.gzip', compression='gzip')