### Spotify EDA
#### Load Artists


Get Artist details from Spotify for every artist in the database.
Create a new table called "Artists" where each record is an artist.  
Primary key is the Spotify Artist URI.

In [39]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import os
import sys
import json
import seaborn as sns

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Table, Column, Integer, String, Float, MetaData
from sqlalchemy import create_engine
from sqlalchemy import and_
import sqlite3
from sqlalchemy.orm import sessionmaker

sys.path.append('../')
from spotify_api import get_spotify_data
from spotify_database import get_session, display_time

#TEMP
from spotify_api import parse_spotify_url, token, TokenAuth
import requests

from urllib.parse import urlencode

%matplotlib inline

In [40]:
# !pip install ipywidgets 
!jupyter nbextension enable --py widgetsnbextension
# !jupyter labextension install @jupyter-widgets/jupyterlab-manager

# %%capture
from tqdm import tqdm_notebook as tqdm
tqdm().pandas()

Enabling notebook extension jupyter-js-widgets/extension...
      - Validating: [32mOK[0m


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




### Establish DataBase Connection

In [41]:
db_path = '../data/spotify_songs.db'

# Get sesion
session = get_session(db_path)
engine = create_engine('sqlite:///' + db_path)

# Get Classes
Playlists = getattr(get_session, "Playlists")
Tracks = getattr(get_session, "Tracks")
Artists = getattr(get_session, "Artists")

### Establish Spotify API Connection

In [42]:
# Get URI for Artist, Track and Album for a song
rv = display_time(session.query(Playlists.track_name, 
                                Playlists.track_uri,
                                Playlists.artist_uri,
                                Playlists.album_uri).filter(Playlists.track_name=="Who Says").distinct().first)
rv

Time to Execute: 0.01 seconds


('Who Says',
 'spotify:track:0HLWvLKQWpFdPhgk6ym58n',
 'spotify:artist:0hEurMDQu99nJRq8pTxO14',
 'spotify:album:1V5vQRMWTNGmqwxY8jMVou')

In [43]:
get_spotify_data(db_uri=rv.artist_uri, key='genres')

['neo mellow', 'pop', 'pop rock', 'singer-songwriter']

In [6]:
# Create DB and Table if they don't exist


connection = engine.connect() #creates db if it doesn't exist
meta = MetaData()

# songs = Table(
#                 'songs', meta,
#                 Column('id', Integer, primary_key = True),
#                 Column('playlist_id', Integer),
#                 Column('pos', Integer), 
#                 Column('artist_name', String), 
#                 Column('track_uri', String),
#                 Column('artist_uri', String), 
#                 Column('track_name', String),
#                 Column('album_uri', String), 
#                 Column('duration_ms', String), 
#                 Column('album_name', String)
# )

tracks = Table(
                'tracks', meta,
                Column('track_uri', String, primary_key = True),
                Column('artist_uri', String),
                Column('danceability', Float),
                Column('energy', Float),
                Column('key', Integer),
                Column('loudness', Float),
                Column('mode', Integer),
                Column('speechiness', Float),
                Column('acousticness', Float),
                Column('instrumentalness', Float),
                Column('liveness', Float),
                Column('valence', Float),
                Column('tempo', Float),
                Column('duration_ms', Integer),
                Column('time_signature', Integer)
)

# artists = Table(
#                 'artists', meta,
#                 Column('artist_uri', String, primary_key = True),
#                 Column('followers', Integer),
#                 Column('genres', String),
#                 Column('artist_name', String),
#                 Column('artist_popularity', Integer)
# )

meta.create_all(engine)

In [17]:
# artists.drop(engine)

In [6]:
def build_artist_string(uri_list:list) -> str:
    rv_str = ''
    
    for u in uri_list:
        t_type, t_id = parse_spotify_url(u)
        if t_type != "artist":
            print("ERR: Only 'artist'-type uri's have features: {}".format(u))
            return False
        
        if rv_str == '':
            rv_str += t_id
        else:
            rv_str += ',' + t_id
            
    return rv_str

In [7]:
# get artists
from time import sleep
def get_artists(artist_uri_list:list) -> list:
    
    r_dict = {'ids': build_artist_string(artist_uri_list)}
        
    # build spotify url from db uri
    spotify_url = "https://api.spotify.com/v1/artists/"

    token()  # ensure token is valid
    response = requests.get(spotify_url, 
                            auth=TokenAuth('Bearer ' + token.token),
                            params=r_dict)
    
    if response.status_code == 429: # too many requests
        retry_value = response.headers.get('Retry-After')
        print("API limit.  Waiting {} seconds".format(retry_value))
        for s in tqdm(range(retry_value+1)):
            sleep(1)
            
        # resend request
        token()  # ensure token is valid
        response = requests.get(spotify_url,
                                auth=TokenAuth('Bearer ' + token.token),
                                params=r_dict)
                
    if response.status_code != 200:
        print("ERR: Unable to access Spotify data:  ERR:{}".format(response.status_code, url))
        return False
    
    return response.json().get('artists')


In [8]:
# get artists
from time import sleep
def get_artist(artist_uri:str) -> list:
    
    a_type, a_id = parse_spotify_url(artist_uri)
    
    if a_type!="artist":
        print("URI is not an artist URI: {}".format(artist_uri))
        return False
        
    # build spotify url from db uri
    spotify_url = "https://api.spotify.com/v1/artists/" + a_id

    token()  # ensure token is valid
    response = requests.get(spotify_url, 
                            auth=TokenAuth('Bearer ' + token.token))
    
    if response.status_code == 429: # too many requests
        retry_value = response.headers.get('Retry-After')
        print("API limit.  Waiting {} seconds".format(retry_value))
        for s in tqdm(range(retry_value+1)):
            sleep(1)
            
        # resend request
        token()  # ensure token is valid
        response = requests.get(spotify_url,
                                auth=TokenAuth('Bearer ' + token.token))
                
    if response.status_code != 200:
        print("ERR: Unable to access Spotify data:  ERR:{}".format(response.status_code, url))
        return False
    
    return response.json()


In [9]:
def load_artists(artists):
    Session = sessionmaker(bind = engine)
    session = Session()
    
    for a in artists:

        session.add(Artists( artist_uri          = a.get('uri'),
                             followers           = a.get('followers').get('total'),
                             genres              = ",".join(a.get('genres')),
                             artist_name         = a.get('name'),
                             artist_popularity   = a.get('popularity')
                           )
                   )

    try:
        session.commit()
    except Exception as e:
        session.rollback()
        
        # commit individually
        for i, a in enumerate(artists):
            session.add(Artists( artist_uri          = a.get('uri'),
                                 followers           = a.get('followers').get('total'),
                                 genres              = ",".join(a.get('genres')),
                                 artist_name         = a.get('name'),
                                 artist_popularity   = a.get('popularity')
                               )
                       )
            try:
                session.commit()
            except Exception as e:
                print("DUPLICATE {}: {} - {}".format(i, a.get('uri'), a.get('name') ))
                session.rollback()
        
    return len(artists)
    

In [10]:
def load_all(artists_list, blk_size:int=50):
    """
    Loads all artists db.
    Spotify only allows 50 tracks to be queried at once.
    """

    length = len(artists_list)
    if length==0:
        print("No records to load")
        return False
    
    start = 0
    for end in tqdm(range(blk_size,length+blk_size,blk_size)):
        if end>length:
            end=length

        # do query
        a = get_artists(artists_list[start:end])
        start = end

        # do load
        load_artists(a)
            

### Load Artists

First see what may already be loaded and only load items in the Songs DB that aren't already in the Artists DB

In [12]:

# get all unique tracks from playlists
all_artists = display_time(session.query(Playlists.artist_uri).distinct().all)
all_artists = [a.artist_uri for a in all_artists]

# make sure we aren't loading something already loaded
loaded_artists = display_time(session.query(Artists.artist_uri).all)
loaded_artists = [a.artist_uri for a in loaded_artists]

missing_artists = list(set(all_artists) - set(loaded_artists))

len(missing_artists)

Time to Execute: 111.91 seconds
Time to Execute: 8.76 seconds


0

In [188]:
# WARNING!!!  Loads all values into DB
# Initial load will take several hours - 30 min est 
if len(missing_artists) > 0:
    load_all(missing_artists)

HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

DUPLICATE 0: spotify:artist:6l6NgkV6IWYKlRN7jHCQmt - Stanley Myers



### DB Corrections
Some artists have new spotify ID numbers.  When you query the ID from the main playlist table, you get back a new ID number.

This results in having Artist URI's in the playlist table that are not in the Artist table.

To correct this, the playlist table is updated for the new uri.

In [189]:
# get the mismatch items
# routine will go through the list of
# mismatches and look each one up in the playlist table.  
# Then, the playlist URI will be retrieved.  If the artist name is the same,
# then the new URI from Spotify will be updated in the playlist table.

In [227]:
suspect_list = missing_artists

In [230]:
# Go through all suspect records.
# Get spotify details for the record's artist
# Make sure that the names are the same - if not, no update
# If names match - update the db_entry in the table and commit
updates = 0
for suspect in tqdm(suspect_list):
    
    db_artist_entries = display_time(session.query(Songs).filter(Songs.artist_uri==suspect).all)
    
    for db_entry in db_artist_entries:
        spotify_artist = get_artist(db_entry.artist_uri)
        spotify_artist_name = spotify_artist.get('name')
        spotify_artist_uri = spotify_artist.get('uri')

        if spotify_artist.get('uri') != db_entry.artist_uri:
            db_entry.artist_uri = spotify_artist_uri
            session.commit()
            updates += 1
        else:
            print("URIs matched: {} - {}".format(spotify_artist_uri, spotify_artist_name))

updates

HBox(children=(IntProgress(value=0, max=1994), HTML(value='')))

Time to Execute: 44.2 seconds
Time to Execute: 40.69 seconds
Time to Execute: 41.98 seconds
Time to Execute: 41.5 seconds
Time to Execute: 39.69 seconds
Time to Execute: 40.53 seconds
Time to Execute: 40.0 seconds
Time to Execute: 38.49 seconds
Time to Execute: 42.27 seconds
Time to Execute: 39.58 seconds
Time to Execute: 40.39 seconds
Time to Execute: 39.44 seconds
Time to Execute: 38.98 seconds
Time to Execute: 38.93 seconds
Time to Execute: 41.39 seconds
Time to Execute: 39.13 seconds
Time to Execute: 40.12 seconds
Time to Execute: 39.91 seconds
Time to Execute: 41.01 seconds
Time to Execute: 40.99 seconds
Time to Execute: 44.73 seconds
Time to Execute: 50.87 seconds
Time to Execute: 52.98 seconds
Time to Execute: 48.56 seconds
Time to Execute: 52.15 seconds
Time to Execute: 55.08 seconds
Time to Execute: 50.33 seconds
Time to Execute: 53.1 seconds
Time to Execute: 51.8 seconds
Time to Execute: 51.83 seconds
Time to Execute: 54.49 seconds
token():INFO:   Token refreshed
Time to Exec

Time to Execute: 48.69 seconds
Time to Execute: 47.32 seconds
Time to Execute: 45.44 seconds
Time to Execute: 50.11 seconds
Time to Execute: 48.53 seconds
Time to Execute: 43.96 seconds
Time to Execute: 49.71 seconds
Time to Execute: 49.74 seconds
Time to Execute: 48.02 seconds
Time to Execute: 53.21 seconds
Time to Execute: 45.46 seconds
Time to Execute: 50.05 seconds
Time to Execute: 53.37 seconds
Time to Execute: 44.97 seconds
Time to Execute: 50.96 seconds
Time to Execute: 46.74 seconds
Time to Execute: 44.55 seconds
Time to Execute: 48.11 seconds
Time to Execute: 45.99 seconds
Time to Execute: 51.48 seconds
Time to Execute: 47.53 seconds
Time to Execute: 47.77 seconds
Time to Execute: 51.8 seconds
Time to Execute: 47.02 seconds
Time to Execute: 44.03 seconds
Time to Execute: 50.32 seconds
Time to Execute: 47.75 seconds
Time to Execute: 53.72 seconds
Time to Execute: 56.45 seconds
Time to Execute: 49.99 seconds
Time to Execute: 49.99 seconds
Time to Execute: 54.23 seconds
Time to E

Time to Execute: 6.9 seconds
Time to Execute: 6.69 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.84 seconds
Time to Execute: 7.59 seconds
Time to Execute: 6.92 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.69 seconds
Time to Execute: 6.82 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.94 seconds
Time to Execute: 6.8 seconds
Time to Execute: 6.81 seconds
Time to Execute: 6.87 seconds
Time to Execute: 6.87 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.74 seconds
Time to Execute: 6.77 seconds
Time to Execute: 6.77 seconds
Time to Execute: 6.83 seconds
Time to Execute: 6.93 seconds
Time to Execute: 6.76 seconds
Time to Execute: 6.75 seconds
Time to Execute: 6.8 seconds
Time to Execute: 6.76 seconds
Time to Execute: 6.73 seconds
Time to Execute: 6.99 seconds
Time to Execute: 6.8 seconds
Time to Execute: 6.87 seconds
Time to Execute: 6.83 seconds
Time to Execute: 6.68 seconds
Time to Execute: 6.75 seconds
Time to Execute: 6.73 seconds
Time to Execut

Time to Execute: 6.82 seconds
Time to Execute: 6.93 seconds
Time to Execute: 6.98 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.92 seconds
Time to Execute: 7.0 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.98 seconds
Time to Execute: 7.0 seconds
Time to Execute: 6.81 seconds
Time to Execute: 6.82 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.8 seconds
Time to Execute: 6.87 seconds
Time to Execute: 6.88 seconds
Time to Execute: 6.92 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.66 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.73 seconds
Time to Execute: 6.91 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.88 seconds
Time to Execute: 6.92 seconds
Time to Execute: 6.7 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.91 seconds
Time to Execute:

Time to Execute: 6.81 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.95 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.72 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.96 seconds
Time to Execute: 6.82 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.89 seconds
Time to Execute: 7.05 seconds
Time to Execute: 6.76 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.94 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.72 seconds
Time to Execute: 6.73 seconds
Time to Execute: 6.96 seconds
Time to Execute: 6.73 seconds
Time to Execute: 6.94 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.96 seconds
Time to Execute: 7.02 seconds
Time to Execute: 6.82 seconds
Time to Execute: 6.91 seconds
Time to Execute: 6.96 seconds
Time to Exec

Time to Execute: 6.83 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.94 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.77 seconds
Time to Execute: 6.96 seconds
Time to Execute: 6.86 seconds
Time to Execute: 6.83 seconds
Time to Execute: 6.79 seconds
Time to Execute: 6.91 seconds
Time to Execute: 6.8 seconds
Time to Execute: 6.74 seconds
Time to Execute: 6.73 seconds
Time to Execute: 7.0 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.71 seconds
Time to Execute: 6.88 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.87 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.89 seconds
Time to Execute: 6.97 seconds
Time to Execute: 6.78 seconds
Time to Execute: 6.76 seconds
Time to Execute: 6.85 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.72 seconds
Time to Execute: 6.84 seconds
Time to Execute: 6.91 seconds
Time to Execute: 6.9 seconds
Time to Execute: 6.93 seconds
Time to Execut

Time to Execute: 15.5 seconds
Time to Execute: 7.2 seconds
Time to Execute: 6.88 seconds
Time to Execute: 25.28 seconds
Time to Execute: 16.74 seconds
Time to Execute: 25.76 seconds
Time to Execute: 13.69 seconds
Time to Execute: 11.86 seconds
Time to Execute: 13.85 seconds
Time to Execute: 33.65 seconds
Time to Execute: 15.26 seconds
Time to Execute: 20.02 seconds
Time to Execute: 56.17 seconds
Time to Execute: 19.7 seconds
Time to Execute: 55.93 seconds
Time to Execute: 18.99 seconds
Time to Execute: 50.76 seconds
Time to Execute: 18.36 seconds
Time to Execute: 55.51 seconds
Time to Execute: 19.0 seconds
Time to Execute: 53.76 seconds
Time to Execute: 18.37 seconds
Time to Execute: 53.66 seconds
Time to Execute: 17.9 seconds
Time to Execute: 57.41 seconds
Time to Execute: 16.89 seconds
Time to Execute: 55.76 seconds
Time to Execute: 17.08 seconds
Time to Execute: 57.59 seconds
Time to Execute: 16.89 seconds
Time to Execute: 57.7 seconds
Time to Execute: 17.54 seconds
Time to Execute:

Time to Execute: 55.83 seconds
Time to Execute: 53.82 seconds
Time to Execute: 53.64 seconds
Time to Execute: 54.9 seconds
Time to Execute: 50.15 seconds
Time to Execute: 53.55 seconds
Time to Execute: 54.36 seconds
Time to Execute: 58.32 seconds
Time to Execute: 53.38 seconds
Time to Execute: 56.29 seconds
Time to Execute: 52.17 seconds
Time to Execute: 55.78 seconds
Time to Execute: 51.83 seconds
Time to Execute: 51.07 seconds
Time to Execute: 51.41 seconds
Time to Execute: 51.3 seconds
Time to Execute: 51.06 seconds
Time to Execute: 52.51 seconds
Time to Execute: 50.23 seconds
Time to Execute: 50.7 seconds
Time to Execute: 56.67 seconds
Time to Execute: 54.51 seconds
Time to Execute: 55.39 seconds
Time to Execute: 50.96 seconds
Time to Execute: 54.17 seconds
Time to Execute: 52.03 seconds
Time to Execute: 50.19 seconds
Time to Execute: 52.96 seconds
Time to Execute: 51.91 seconds
Time to Execute: 53.99 seconds
Time to Execute: 53.56 seconds
Time to Execute: 54.85 seconds
Time to Exe

79080

### Test Data using Implicit Join
Join Songs and Audio_Features Tables implicitly

In [13]:
test_track = display_time(session.query(Playlists, Audio_Features)
                          .filter(Playlists.track_uri==Audio_Features.track_uri)
                          .distinct()
                          .first)

Time to Execute: 0.03 seconds


In [14]:
test_track.Audio_Features.acousticness

0.974

In [15]:
for tk in test_track.keys():
    print (tk)
    tk_dict = test_track.__getattribute__(tk).__dict__
    for k,v in tk_dict.items():
        
        print ("\t{}  :  {}".format(k,v))

Playlists
	_sa_instance_state  :  <sqlalchemy.orm.state.InstanceState object at 0x1a2a202dd0>
	pos  :  b'\x00\x00\x00\x00\x00\x00\x00\x00'
	id  :  1
	track_uri  :  spotify:track:2d7LPtieXdIYzf7yHPooWd
	track_name  :  Chasing Cars
	duration_ms  :  b'\x84\xb3\x03\x00\x00\x00\x00\x00'
	playlist_id  :  1
	artist_name  :  Sleeping At Last
	artist_uri  :  spotify:artist:0MeLMJJcouYXCymQSHPn8g
	album_uri  :  spotify:album:0UIIvTTWNB3gRQWFoxoEDh
	album_name  :  Covers, Vol. 2
Audio_Features
	_sa_instance_state  :  <sqlalchemy.orm.state.InstanceState object at 0x1a2a202e50>
	danceability  :  0.467
	key  :  11
	mode  :  1
	acousticness  :  0.974
	liveness  :  0.0816
	tempo  :  108.13
	duration_ms  :  242564
	loudness  :  -9.649
	energy  :  0.157
	track_uri  :  spotify:track:2d7LPtieXdIYzf7yHPooWd
	speechiness  :  0.0336
	instrumentalness  :  1.46e-06
	valence  :  0.277
	time_signature  :  4


In [16]:
for tk in test_track.keys():
    print (tk)
    tk_dict = test_track.__getattribute__(tk).__dict__
    for k,v in tk_dict.items():
        print ("\t{}  :  {}".format(k,type(v)))

Playlists
	_sa_instance_state  :  <class 'sqlalchemy.orm.state.InstanceState'>
	pos  :  <class 'bytes'>
	id  :  <class 'int'>
	track_uri  :  <class 'str'>
	track_name  :  <class 'str'>
	duration_ms  :  <class 'bytes'>
	playlist_id  :  <class 'int'>
	artist_name  :  <class 'str'>
	artist_uri  :  <class 'str'>
	album_uri  :  <class 'str'>
	album_name  :  <class 'str'>
Audio_Features
	_sa_instance_state  :  <class 'sqlalchemy.orm.state.InstanceState'>
	danceability  :  <class 'float'>
	key  :  <class 'int'>
	mode  :  <class 'int'>
	acousticness  :  <class 'float'>
	liveness  :  <class 'float'>
	tempo  :  <class 'float'>
	duration_ms  :  <class 'int'>
	loudness  :  <class 'float'>
	energy  :  <class 'float'>
	track_uri  :  <class 'str'>
	speechiness  :  <class 'float'>
	instrumentalness  :  <class 'float'>
	valence  :  <class 'float'>
	time_signature  :  <class 'int'>


### Playlist 'position' and 'duration' fields are binary for some reason.  
The following will update the db records to be integer values

In [28]:
int.from_bytes(test_track.Playlists.pos, byteorder='little')

0

In [34]:
byte_track = display_time(session.query(Playlists)
                          .filter(Playlists.track_name=="Photograph")
                          .distinct()
                          .first)

Time to Execute: 0.01 seconds


In [35]:
int.from_bytes(byte_track.pos, byteorder='little')

11

In [36]:
int.from_bytes(byte_track.duration_ms, byteorder='little')

258986

In [37]:
# Iterate through Playlists
# replace the value in postion with the int value 
# replace duration with the int value
# only do replacements where the value is of type 'byte'

In [8]:
def get_batch(fr_entry:int, to_entry:int, TableName):
    batch = session.query(TableName).filter(and_(TableName.id>=fr_entry, 
                                                 TableName.id<to_entry)).all()
    return batch

In [190]:
batch_size     =    1
start_record   = 35639100
stop_record    = 35639200
length         = stop_record - start_record #session.query(Playlists).count()-start
batches        = int(length/batch_size) # int((length-start)/batch_size)

In [191]:
batch_size, start_record, stop_record, length, batches

(1, 35639100, 35639200, 100, 100)

In [192]:
length, batches

(100, 100)

In [193]:
# convert byte value in pos and duration field to integer
exception_ranges = []

for b in tqdm(range(batches)):
    
    start = start_record + (b*batch_size)
    end   = start_record + (b*batch_size) + batch_size
    if end > stop_record+1:
        end = stop_record+1
        
    for e in get_batch(start,end,Playlists):
        
        commit = False
        if type(e.pos) == bytes:
            int_posval = int.from_bytes(e.pos, byteorder='little')
            e.pos = int_posval
            commit = True

        if type(e.duration_ms) == bytes:
            int_durval = int.from_bytes(e.duration_ms, byteorder='little')
            e.duration_ms = int_durval
            commit = True
    
    # only do a commit if we changed something
    if commit:
        try:
            session.commit()
        except Exception as e:
            session.rollback()
            print("Caught Exception: {}".format (e))
            print("\t\tIn record range: {} - {}".format(start,end))
            exception_ranges.append((start,end))
            
    
    
        
if len(exception_ranges)>0:
    print("Review Exception Ranges:")
    for e in exception_ranges:
        print ("from: {}  to: {}".format(e[0], e[1]))

HBox(children=(IntProgress(value=0), HTML(value='')))

### Found that record 35639172 had an incorrect duration_ms value
manually correcting it

In [9]:
b = get_batch(35639173, 35639174, Playlists)

In [10]:
int.from_bytes(b[0].pos, byteorder='little')

TypeError: cannot convert 'int' object to bytes

In [11]:
b[0].pos

29

In [12]:
int.from_bytes(b[0].duration_ms, byteorder='little')/1000

TypeError: cannot convert 'int' object to bytes

In [13]:
b[0].duration_ms

60132

In [213]:
b[0].track_uri

'spotify:track:6lrtPENGGkx8InNienHt5E'

In [205]:
get_spotify_data(b[0].track_uri, 'duration_ms')

60132

In [187]:
b[0].duration_ms = 1947

In [188]:
session.commit()