In [None]:
import os
import sys
import math
import time
import numpy as numpy
import pandas as pd
import tekore as tk

from pprint import PrettyPrinter

pp = PrettyPrinter()

In [None]:
class SpotifyCleaner:
    
    def __init__(self, conf):
        token = tk.prompt_for_user_token(*conf, scope=tk.scope.every)  
        self.spotify = tk.Spotify(token, chunked_on=True)
        self.user_id = self.spotify.current_user().id
        self.root_dir = 'Spotify_Data-' + self.user_id # Name of folder containing exported data
        self.store = None
        self.empty_store = None
        self.cleaned_store = None

    def inventory_all_artists(self):
        # Go through saved songs 
        songs_filepath = os.path.join(self.root_dir, 'Saved_Library/Liked_Songs.csv')
        songs_df = pd.read_csv(songs_filepath)
        artists_from_songs = list(songs_df['artist'].unique())

        # Go through saved albums
        albums_filepath = os.path.join(self.root_dir, 'Saved_Library/Liked_Albums.csv')
        albums_df = pd.read_csv(albums_filepath)
        artists_from_albums = list(albums_df['artist'].unique())
        # Go through all playlists
        playlist_dir = os.path.join(self.root_dir, 'Playlists')
        artists_from_playlists = []
        for playlist_filepath in os.listdir(playlist_dir):
            if not playlist_filepath.startswith("."):
                playlist_df = pd.read_csv(os.path.join(playlist_dir, playlist_filepath))
                artists_from_playlists += list(playlist_df['artist'].unique())
        # Merge all three groups and return them as a set (no duplicates)
        all_artists = artists_from_songs + artists_from_albums + artists_from_playlists
        return set(all_artists)

    def create_inventory_by_artist(self):
        artists = self.inventory_all_artists()
        self.store = {a: {'Songs': {}} for a in artists}
        self.empty_store = {a: {'Songs': {}} for a in artists}

    def inventory_songs_from_playlists(self):
        playlist_dir = os.path.join(self.root_dir, 'Playlists')
        for playlist_filepath in os.listdir(playlist_dir):
            if not playlist_filepath.startswith("."):
                playlist_df = pd.read_csv(os.path.join(playlist_dir, playlist_filepath))
                for i in playlist_df.index:
                    artist = playlist_df['artist'].iloc[i]
                    song_title = playlist_df['name'].iloc[i]
                    album_title = playlist_df['album'].iloc[i]
                    song_id = playlist_df['id'].iloc[i]
                    if not song_title in self.store[artist]['Songs'].keys():
                        self.store[artist]['Songs'][song_title] = [(song_id, album_title)]
                    else:
                        self.store[artist]['Songs'][song_title] += [(song_id, album_title)]
        for artist in self.store.keys():
            for song_title in self.store[artist]['Songs'].keys():
                self.store[artist]['Songs'][song_title] = list(set(self.store[artist]['Songs'][song_title]))
                
    def find_multiple_version_songs(self):
        self.multiple_versions_store = self.empty_store
        tracks_to_look_up = []
        look_up_index = 0
        for artist in self.store.keys():
            for song_title in self.store[artist]['Songs'].keys():
                if len(self.store[artist]['Songs'][song_title]) > 1:
                    index_track_map = []
                    for item in self.store[artist]['Songs'][song_title]:
                        tracks_to_look_up.append(item[0])
                        index_track_map.append(look_up_index)
                        look_up_index += 1
                    self.multiple_versions_store[artist]['Songs'][song_title] = index_track_map
        return tracks_to_look_up
    
    def retrieve_tracks_info(self):
        tracks_to_look_up = self.find_multiple_version_songs()
        all_tracks = self.spotify.tracks(tracks_to_look_up, market='from_token')
        return all_tracks
                        
                        
                    
    def create_mapping_inventory(self):
        # either use the info from spotify or just look at the album names 
        # probably harder to do album names
        all_tracks = self.retrieve_tracks_info()
        # then go through the multiple versions store
        # for each song_title
            # take the listed indexes of all_tracks
            # look at info 
            # prioritize according to decided strategy        

In [None]:
conf = tk.config_from_file('tekore.cfg')
Cleaner = SpotifyCleaner(conf)

In [None]:
Cleaner.create_inventory_by_artist()
Cleaner.inventory_songs_from_playlists()

In [None]:
num_songs = 0

for artist in Cleaner.store.keys():
    num_songs += len(Cleaner.store[artist]['Songs'])
    
print(num_songs)