# Update spotify net/assign tracks

> Update spotify net by assigning tracks with above a playcount threshold to an existing public playlist and removing from net

In [1]:
#| default_exp prep_model

In [2]:
# export
import pandas as pd
import requests
import boto3
import json
from io import BytesIO
import joblib
import pickle
import numpy as np
pd.set_option('display.max_columns', None)

In [3]:
#hide
from nbdev.showdoc import *

In [4]:
class ModelPrep:
    def __init__(self):
        self.scaler = None
        self.svd = None
        self.prepped_frame = None
        self.genre_series = None
        self.key_series = None
        
        
    def load_s3(self):
        '''
        Ok, so it looks like the order in which I'm doing things is:
        1. Load a pretrained scaler
        2. Load a pretrained SVD...model?
        3. Load Spotify data
        4. Load LastFM data
        5. Load a pandas series of genres?
        6. Load a pandas series of eleven keys
        7. Load a pandas series of four time signatures

        Could I instead load a sci-kit learn pipeline?
        '''

        s3_resource = boto3.resource('s3')

        scaler = s3_resource.Object('spotify-net', 'scaler').get()
        scaler = pickle.loads(scaler['Body'].read())
        self.scaler = scaler

        svd = s3_resource.Object('spotify-net', 'svd').get()
        svd = pickle.loads(svd['Body'].read())
        self.svd = svd

        recent_spotify_tracks = pd.read_csv('s3://spotify-net/newer_tracks.csv', index_col=0)
        recent_lastFM_tracks = pd.read_csv('s3://spotify-net/last_fm_recent_tracks.csv', index_col=0)
        recent_spotify_tracks[['name', 'artist']] = recent_spotify_tracks[['name', 'artist']].applymap(lambda x: x.upper())
        recent_lastFM_tracks[['name', 'artist']] = recent_lastFM_tracks[['name', 'artist']].applymap(lambda x: x.upper())
        self.prepped_frame = pd.merge(recent_spotify_tracks, recent_lastFM_tracks, on=['name', 'artist'])
        self.prepped_frame = self.prepped_frame.drop([
            'playcount', 
            'added at', 
            'artist id',
            'id',
            'type',
            'track_href',
            'analysis_url',
            'type',
            'diff'
        ], axis=1) 


        genre_series = pd.read_csv('s3://spotify-net/genres_svd.csv', index_col=0, squeeze=True)
        self.genre_series = genre_series

        # These don't need to be loaded from S3, but I'm doing it anyway because...?
        key_series = pd.read_csv('s3://spotify-net/key_list.csv', index_col=0, squeeze=True)
        self.key_series = key_series
        time_series = pd.read_csv('s3://spotify-net/timeSig_list.csv', index_col=0, squeeze=True)
        self.time_series = time_series     
    
    # TODO: should prepped frame be an argument? Why not an instance variable?
    def transform_features(self, constant):
        # ok, so order I'm log transforming before I'm applying the standard scaler later on? What is the interaction between these two operations generally?
        # also, review feature selection process as it relates to feature transformation, and traditional order here

        # log-transform 'speechiness', 'acousticness', 'instrumentalness':
        c=constant
        self.prepped_frame[['speechiness', 'acousticness', 'instrumentalness']] = self.prepped_frame[['speechiness', 'acousticness', 'instrumentalness']] + c
        self.prepped_frame[['speechiness', 'acousticness', 'instrumentalness']] = np.log(self.prepped_frame[['speechiness', 'acousticness', 'instrumentalness']])

        # one-hot 'key', 'time_signature':
        self.prepped_frame = pd.get_dummies(self.prepped_frame , prefix=['key', 'time_signature'], columns=['key', 'time_signature'])

        # apply standardScaler to 'danceability', 'energy', 'loudness', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms':
        columns_to_scale = ['danceability', 'energy', 'loudness',
        'speechiness', 'acousticness', 'instrumentalness', 'liveness',
        'valence', 'tempo', 'duration_ms']
        self.prepped_frame[columns_to_scale] = self.scaler.transform(self.prepped_frame[columns_to_scale])

In [5]:
m = ModelPrep()
m.load_s3()

In [7]:
pd.read_csv('s3://spotify-net/newer_tracks.csv', index_col=0)

Unnamed: 0,added at,id,name,artist,artist id,playlist id,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,uri,track_href,analysis_url,duration_ms,time_signature,genre_belgian electronic,genre_brostep,genre_complextro,genre_edm,genre_florida rap,genre_house,genre_memphis hip hop,genre_stutter house,genre_vapor twitch,diff
13,2023-07-27 14:39:57+00:00,0Db7JyQoWaIiBsKtSHKi54,Schweigen,The True Spacemen,2shy0RR2ydAT7w05fmYjCh,3ubgXaHeBn1CWLUZPXvqkj,0.715,0.489,4,-7.405,1,0.0383,0.0303,0.46,0.118,0.488,130.993,audio_features,spotify:track:0Db7JyQoWaIiBsKtSHKi54,https://api.spotify.com/v1/tracks/0Db7JyQoWaIi...,https://api.spotify.com/v1/audio-analysis/0Db7...,204275,4,0,0,0,0,0,0,0,0,0,-1 days +09:20:03
12,2023-07-27 14:39:45+00:00,4sRALTDj1vEOe5dPnPg8Qt,Zebra,Jackboy,2S2mt1DiA4QKdKvtqwxrbB,3ubgXaHeBn1CWLUZPXvqkj,0.781,0.777,11,-8.193,0,0.392,0.144,0.0,0.0966,0.645,157.001,audio_features,spotify:track:4sRALTDj1vEOe5dPnPg8Qt,https://api.spotify.com/v1/tracks/4sRALTDj1vEO...,https://api.spotify.com/v1/audio-analysis/4sRA...,110064,4,0,0,0,0,1,0,1,0,0,-1 days +09:20:15
11,2023-07-27 14:39:36+00:00,6tifCCTIVBLC2TmTquYG7G,Fine Day Anthem,Skrillex,5he5w2lnU9x7JFhnwcekXX,3ubgXaHeBn1CWLUZPXvqkj,0.745,0.85,6,-3.903,0,0.075,0.000285,0.0392,0.118,0.54,138.03,audio_features,spotify:track:6tifCCTIVBLC2TmTquYG7G,https://api.spotify.com/v1/tracks/6tifCCTIVBLC...,https://api.spotify.com/v1/audio-analysis/6tif...,188261,4,0,1,1,1,0,0,0,0,0,-1 days +09:20:24
10,2023-07-25 17:24:35+00:00,4fWBsxD5qLDz98zi900VyZ,Matrix,Daktyl,2YrfEkWJvn1wue6JqekigJ,3ubgXaHeBn1CWLUZPXvqkj,0.709,0.539,10,-8.873,0,0.146,0.634,0.476,0.115,0.0964,127.128,audio_features,spotify:track:4fWBsxD5qLDz98zi900VyZ,https://api.spotify.com/v1/tracks/4fWBsxD5qLDz...,https://api.spotify.com/v1/audio-analysis/4fWB...,200419,4,0,0,0,0,0,0,0,0,1,1 days 06:35:25
9,2023-07-23 20:12:50+00:00,2Ol9oSXmACB2dhEArY1Acn,Witness,Sam Silver,1hhCo32XBkw8H0iqTSnkx0,3ubgXaHeBn1CWLUZPXvqkj,0.594,0.424,11,-12.884,1,0.175,0.0993,0.0,0.162,0.27,124.001,audio_features,spotify:track:2Ol9oSXmACB2dhEArY1Acn,https://api.spotify.com/v1/tracks/2Ol9oSXmACB2...,https://api.spotify.com/v1/audio-analysis/2Ol9...,151285,4,0,0,0,0,0,0,0,0,0,3 days 03:47:10
8,2023-07-23 20:12:47+00:00,0tOeHPjaB22chZyHd4NmFP,BLK Magîc,Meechy Darko,5QRlmTynCos8JyojtmgNq6,3ubgXaHeBn1CWLUZPXvqkj,0.496,0.567,7,-7.118,1,0.045,0.772,2.1e-05,0.137,0.363,129.098,audio_features,spotify:track:0tOeHPjaB22chZyHd4NmFP,https://api.spotify.com/v1/tracks/0tOeHPjaB22c...,https://api.spotify.com/v1/audio-analysis/0tOe...,322022,4,0,0,0,0,0,0,0,0,0,3 days 03:47:13
7,2023-07-23 20:12:45+00:00,4PArRqkYWtLrKsLWM03Rpi,Sorry and Sunscreen,Mosley Jr,35esnnk9qnh5dte6Lqrzav,3ubgXaHeBn1CWLUZPXvqkj,0.792,0.466,0,-8.255,0,0.0683,0.653,0.816,0.0948,0.455,123.016,audio_features,spotify:track:4PArRqkYWtLrKsLWM03Rpi,https://api.spotify.com/v1/tracks/4PArRqkYWtLr...,https://api.spotify.com/v1/audio-analysis/4PAr...,210000,4,1,0,0,0,0,0,0,0,0,3 days 03:47:15
6,2023-07-23 20:12:42+00:00,0Lsvj97pplrpK2miMyS0XS,"Think Deep, Don't Sink",Willy P,18jVuRF2PcWQX3BWLUoIIP,3ubgXaHeBn1CWLUZPXvqkj,0.844,0.447,9,-9.91,1,0.0412,0.239,0.657,0.106,0.623,107.993,audio_features,spotify:track:0Lsvj97pplrpK2miMyS0XS,https://api.spotify.com/v1/tracks/0Lsvj97pplrp...,https://api.spotify.com/v1/audio-analysis/0Lsv...,124444,4,0,0,0,0,0,0,0,0,0,3 days 03:47:18
5,2023-07-23 20:12:39+00:00,4qZ52fbjyJF6C4LJ1Tlobw,Like A Heart Won't Beat,Skinny Pelembe,1ZxriNuHn6Upjf3U97nnUN,3ubgXaHeBn1CWLUZPXvqkj,0.559,0.67,2,-8.633,0,0.0441,0.0852,0.00214,0.159,0.313,100.019,audio_features,spotify:track:4qZ52fbjyJF6C4LJ1Tlobw,https://api.spotify.com/v1/tracks/4qZ52fbjyJF6...,https://api.spotify.com/v1/audio-analysis/4qZ5...,146000,4,0,0,0,0,0,0,0,0,0,3 days 03:47:21
4,2023-07-23 20:12:36+00:00,3d8q3uiSK4ppolXtVSxLxY,Gyally From West,Harvey Whyte,0oixhuMlcSB9H0TociqrsI,3ubgXaHeBn1CWLUZPXvqkj,0.957,0.693,8,-7.256,0,0.0926,0.172,0.000103,0.0977,0.692,124.969,audio_features,spotify:track:3d8q3uiSK4ppolXtVSxLxY,https://api.spotify.com/v1/tracks/3d8q3uiSK4pp...,https://api.spotify.com/v1/audio-analysis/3d8q...,184318,4,0,0,0,0,0,0,0,0,0,3 days 03:47:24


In [None]:

m.transform_features(0.0000001)

m.genre_series


In [None]:
frame = m.prepped_frame
frame.loc[:, frame.columns.str.startswith('genre_')]

In [None]:
frame

In [None]:
curr_gen = df_track.loc[:, df_track.columns.str.startswith('genre_')]
full_gen = pd.DataFrame(np.zeros((len(df_track), len(gen_series.tolist()))) , columns=gen_series.tolist())
full_gen = full_gen.add_prefix('genre_')
full_gen.update(curr_gen)
full_gen.columns = full_gen.columns.str.replace('genre_', '')

In [None]:
# export
if __name__ == '__main__':
    s3_objects = load_s3()
    merged_df = merge_frame(s3_objects['spot_tracks'], s3_objects['last_tracks'])
    transformed = dummies_and_scale(merged_df, 0.0000001, s3_objects['scaler'])
    full_frame(transformed, s3_objects['gen_series'], s3_objects['svd'], s3_objects['key_series'], s3_objects['time_series'])

In [None]:
# export
def full_frame(df_track, gen_series, svd, key_series, time_series):

    curr_gen = df_track.loc[:, df_track.columns.str.startswith('genre_')]
    full_gen = pd.DataFrame(np.zeros((len(df_track), len(gen_series.tolist()))) , columns=gen_series.tolist())
    full_gen = full_gen.add_prefix('genre_')
    full_gen.update(curr_gen)
    full_gen.columns = full_gen.columns.str.replace('genre_', '')

    test_trans = svd.transform(full_gen)
    test_trans = pd.DataFrame(test_trans)
    test_trans = test_trans.add_prefix('genre_')

    df_track = df_track.loc[:, ~df_track.columns.str.startswith('genre_')]
    df_track = pd.concat([df_track, test_trans], axis=1)

    curr_key = df_track.loc[:, df_track.columns.str.startswith('key_')]
    full_key = pd.DataFrame(np.zeros((len(df_track), len(key_series.tolist()))) , columns=key_series.tolist())
    full_key.update(curr_key)

    df_track = df_track.loc[:, ~df_track.columns.str.startswith('key_')]
    df_track = pd.concat([df_track, full_key], axis=1)

    curr_time = df_track.loc[:, df_track.columns.str.startswith('time_signature_')]
    full_time = pd.DataFrame(np.zeros((len(df_track), len(time_series.tolist()))) , columns=time_series.tolist())
    full_time.update(curr_time)

    df_track = df_track.loc[:, ~df_track.columns.str.startswith('time_signature_')]
    df_track = pd.concat([df_track, full_time], axis=1)
    df_track.to_csv('s3://spotify-net/for_prediction.csv')
    print(df_track.shape)

    print('Uploaded to S3')
