In [1]:
import pandas as pd

In [2]:
!pip install spotipy



In [13]:
__all__ = [
    'CacheHandler',
    'CacheFileHandler',
    'DjangoSessionCacheHandler',
    'FlaskSessionCacheHandler',
    'MemoryCacheHandler',
    'RedisCacheHandler']

import errno
import json
import logging
import os
from spotipy.util import CLIENT_CREDS_ENV_VARS

from redis import RedisError

logger = logging.getLogger(__name__)


class CacheHandler():
    """
    An abstraction layer for handling the caching and retrieval of
    authorization tokens.

    Custom extensions of this class must implement get_cached_token
    and save_token_to_cache methods with the same input and output
    structure as the CacheHandler class.
    """

    def get_cached_token(self):
        """
        Get and return a token_info dictionary object.
        """
        # return token_info
        raise NotImplementedError()

    def save_token_to_cache(self, token_info):
        """
        Save a token_info dictionary object to the cache and return None.
        """
        raise NotImplementedError()
        return None


class CacheFileHandler(CacheHandler):
    """
    Handles reading and writing cached Spotify authorization tokens
    as json files on disk.
    """

    def __init__(self,
                 cache_path=None,
                 username=None,
                 encoder_cls=None):
        """
        Parameters:
             * cache_path: May be supplied, will otherwise be generated
                           (takes precedence over `username`)
             * username: May be supplied or set as environment variable
                         (will set `cache_path` to `.cache-{username}`)
             * encoder_cls: May be supplied as a means of overwriting the
                        default serializer used for writing tokens to disk
        """
        self.encoder_cls = encoder_cls
        if cache_path:
            self.cache_path = cache_path
        else:
            cache_path = ".cache"
            username = (username or os.getenv(CLIENT_CREDS_ENV_VARS["client_username"]))
            if username:
                cache_path += "-" + str(username)
            self.cache_path = cache_path

    def get_cached_token(self):
        token_info = None

        try:
            f = open(self.cache_path)
            token_info_string = f.read()
            f.close()
            token_info = json.loads(token_info_string)

        except IOError as error:
            if error.errno == errno.ENOENT:
                logger.debug("cache does not exist at: %s", self.cache_path)
            else:
                logger.warning("Couldn't read cache at: %s", self.cache_path)

        return token_info

    def save_token_to_cache(self, token_info):
        try:
            f = open(self.cache_path, "w")
            f.write(json.dumps(token_info, cls=self.encoder_cls))
            f.close()
        except IOError:
            logger.warning('Couldn\'t write token to cache at: %s',
                           self.cache_path)


class MemoryCacheHandler(CacheHandler):
    """
    A cache handler that simply stores the token info in memory as an
    instance attribute of this class. The token info will be lost when this
    instance is freed.
    """

    def __init__(self, token_info=None):
        """
        Parameters:
            * token_info: The token info to store in memory. Can be None.
        """
        self.token_info = token_info

    def get_cached_token(self):
        return self.token_info

    def save_token_to_cache(self, token_info):
        self.token_info = token_info


class DjangoSessionCacheHandler(CacheHandler):
    """
    A cache handler that stores the token info in the session framework
    provided by Django.

    Read more at https://docs.djangoproject.com/en/3.2/topics/http/sessions/
    """

    def __init__(self, request):
        """
        Parameters:
            * request: HttpRequest object provided by Django for every
            incoming request
        """
        self.request = request

    def get_cached_token(self):
        token_info = None
        try:
            token_info = self.request.session['token_info']
        except KeyError:
            logger.debug("Token not found in the session")

        return token_info

    def save_token_to_cache(self, token_info):
        try:
            self.request.session['token_info'] = token_info
        except Exception as e:
            logger.warning("Error saving token to cache: " + str(e))


class FlaskSessionCacheHandler(CacheHandler):
    """
    A cache handler that stores the token info in the session framework
    provided by flask.
    """

    def __init__(self, session):
        self.session = session

    def get_cached_token(self):
        token_info = None
        try:
            token_info = self.session["token_info"]
        except KeyError:
            logger.debug("Token not found in the session")

        return token_info

    def save_token_to_cache(self, token_info):
        try:
            self.session["token_info"] = token_info
        except Exception as e:
            logger.warning("Error saving token to cache: " + str(e))


class RedisCacheHandler(CacheHandler):
    """
    A cache handler that stores the token info in the Redis.
    """

    def __init__(self, redis, key=None):
        """
        Parameters:
            * redis: Redis object provided by redis-py library
            (https://github.com/redis/redis-py)
            * key: May be supplied, will otherwise be generated
                   (takes precedence over `token_info`)
        """
        self.redis = redis
        self.key = key if key else 'token_info'

    def get_cached_token(self):
        token_info = None
        try:
            token_info = self.redis.get(self.key)
            if token_info:
                return json.loads(token_info)
        except RedisError as e:
            logger.warning('Error getting token from cache: ' + str(e))

        return token_info

    def save_token_to_cache(self, token_info):
        try:
            self.redis.set(self.key, json.dumps(token_info))
        except RedisError as e:
            logger.warning('Error saving token to cache: ' + str(e))

In [2]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [3]:
spotipy_client_id='641869fabf814412bb6d32728560e60c'
spotipy_client_secret='4e69a59fbbf947b1932d964f6ed14c30'

In [4]:
auth_manager = SpotifyClientCredentials('641869fabf814412bb6d32728560e60c','4e69a59fbbf947b1932d964f6ed14c30')
sp = spotipy.Spotify(auth_manager=auth_manager)

urn = 'spotify:artist:4KWTAlx2RvbpseOGMEmROg'



artist = sp.artist(urn)
artist

KeyboardInterrupt: 

In [17]:

urn = 'spotify:track:2C4aYxNpoPkmoZ3ZdPeuOB'



track = sp.track(urn)
track['album']['release_date']

'1991-09-18'

In [64]:
list = ['2KnLkZ3z7PO3kgVGHGqDpD',
 '6QgjcU0zLnzq5OrUoSZ3OK',
 '1ei3hzQmrgealgRKFxIcWn',
 '10wMZu2Y9o55ylw6mRSiKB',
 '05WHgjtWx32x40VmFkLnOd',
 '12HaeiaS5UJgoxvnLgU5ro',
 '5PYQUBXc7NYeI1obMKSJK0',
 '287Fa4maOumSHlDoPiGmv9',
 '4Sib57MmYGJzSvkW84jTwh',
 '6RyaV7owmVU6fzEPE17sF1',
 '5uSFGgIfHMT3osrAd9n9ym',
 '4htAwcUcrqPn4RgdeJelzq',
 '5eWgDlp3k6Tb5RD8690s6I',
 '3vv9phIu6Y1vX3jcqaGz5Z',
 '2rYbTiqoxpIYRm4lYiizzb',
 '2hsA9YWuF3C9Gosi4dpjJN',
 '5TmZYdHW6HJ9gt7Aokuql5',
 '63CSozvYUEudPp12679UVF',
 '7Ckhk1XW5NV2k4jpqtQNlz',
 '72gGeQ0dZ7E8RlBeNJNJkb',
 '02WacdrRpm4zlP8H7X6bnQ',
 '0nrRP2bk19rLc0orkWPQk2',
 '2jt2WxXMCD4zjACthkJQVE',
 '0VhgEqMTNZwYL1ARDLLNCX',
 '07nH4ifBxUB4lZcsf44Brn',
 '2xdwCJLfKCkat14mlgR26M',
 '5pY3ovFxbvAg7reGZjJQSp',
 '4UJBJ4KqLFvAv8xJvYcRxq',
 '6LcauUZjF1eXQrgqMUecHX',
 '2ekn2ttSfGqwhhate0LSR0']

In [65]:
auth_manager = SpotifyClientCredentials(spotipy_client_id, spotipy_client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

release_dates = []

for i in tqdm(list):
    urn = f'spotify:track:{i}'

    track = sp.track(urn)
    date = track['album']['release_date']
    release_dates.append(date)

  0%|                                                                    | 0/30 [00:10<?, ?it/s]


KeyboardInterrupt: 

In [None]:
release_dates

In [15]:
df = pd.read_csv('./charts.csv')
df

Unnamed: 0,track_id,name,country,date,position,streams,artists,artist_genres,duration,explicit
0,20IvMlpi4U5RuDnAlXSRiV,Crackküche,de,2021-04-15,82,625718,['Haftbefehl'],['german hip hop'],198746,False
1,0V1K6MU0utODk4yNqZKsFv,WATER,jp,2019-01-31,171,50896,['Suchmos'],"['japanese r&b', 'j-rock', 'japanese soul', 'j...",408320,False
2,4qzZm5EIdFurBpDieEmVc9,Gözleri Aşka Gülen,tr,2018-11-15,59,185439,['Nilipek.'],"['turkish singer-songwriter', 'turkish rock']",257142,False
3,4qzZm5EIdFurBpDieEmVc9,Gözleri Aşka Gülen,tr,2018-11-22,133,111159,['Nilipek.'],"['turkish singer-songwriter', 'turkish rock']",257142,False
4,4qzZm5EIdFurBpDieEmVc9,Gözleri Aşka Gülen,tr,2018-11-29,166,96204,['Nilipek.'],"['turkish singer-songwriter', 'turkish rock']",257142,False
...,...,...,...,...,...,...,...,...,...,...
5299303,7731grGgxDd7aBNpGOjm9k,När löven faller,se,2019-11-28,163,165644,['Ebbot Lundberg'],['swedish alternative rock'],252798,False
5299304,4Vm9XIi9AD1oK39xACmqk5,ワガママで誤魔化さないで,jp,2019-01-17,161,49580,['THE ORAL CIGARETTES'],"['j-poprock', 'anime', 'j-rock', 'japanese ind...",257270,False
5299305,4Vm9XIi9AD1oK39xACmqk5,ワガママで誤魔化さないで,jp,2019-01-24,139,56826,['THE ORAL CIGARETTES'],"['j-poprock', 'anime', 'j-rock', 'japanese ind...",257270,False
5299306,4Vm9XIi9AD1oK39xACmqk5,ワガママで誤魔化さないで,jp,2019-01-31,151,53897,['THE ORAL CIGARETTES'],"['j-poprock', 'anime', 'j-rock', 'japanese ind...",257270,False


In [11]:
track_ids = df['track_id'].to_list()

In [12]:
len(track_ids)

5299308

In [None]:
from tqdm import tqdm

In [14]:
auth_manager = SpotifyClientCredentials(spotipy_client_id, spotipy_client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

release_dates = []

for i in tqdm(track_ids):
    urn = f'spotify:track:{i}'

    track = sp.track(urn)
    date = track['album']['release_date']
    release_dates.append(date)

  0%|                                                                                                 | 0/5299308 [01:27<?, ?it/s]


KeyboardInterrupt: 

In [None]:
df['release_date'] = release_dates

In [18]:
df['track_id'].nunique()

102947

In [52]:
track_ids = df['track_id'].iloc[0:31].tolist()

In [55]:
len(track_ids)

31

In [43]:
print(*track_ids, sep = ", ")

20IvMlpi4U5RuDnAlXSRiV, 0V1K6MU0utODk4yNqZKsFv, 4qzZm5EIdFurBpDieEmVc9, 4qzZm5EIdFurBpDieEmVc9, 4qzZm5EIdFurBpDieEmVc9, 4qzZm5EIdFurBpDieEmVc9, 1vWrdDoVve3adC23brWRke, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 0N8PEQ33Ba841py3SEV0Wp, 6BIfloI765jfr51Se2xSdw, 6BIfloI765jfr51Se2xSdw, 6BIfloI765jfr51Se2xSdw, 5m4y68zsFwsix7ng4cLB1j, 5m4y68zsFwsix7ng4cLB1j, 5m4y68zsFwsix7ng4cLB1j, 5m4y68zsFwsix7ng4cLB1j, 5m4y68zsFwsix7ng4cLB1j, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFdtUhbAM, 5NcTOxzo2WE20tFd

In [60]:
auth_manager = SpotifyClientCredentials(spotipy_client_id, spotipy_client_secret)
sp = spotipy.Spotify(auth_manager=auth_manager)

In [57]:
import time

In [61]:
release_dates = []

for i in tqdm(track_ids):
    urn = f'spotify:track:{i}'
    track = sp.track(urn)
    date = track['album']['release_date']
    release_dates.append(date)
    time.sleep(10)

  0%|                                                                    | 0/31 [00:26<?, ?it/s]


KeyboardInterrupt: 

In [29]:
df_ger = df[df['country']=='de']
df_ger

Unnamed: 0,track_id,name,country,date,position,streams,artists,artist_genres,duration,explicit
0,20IvMlpi4U5RuDnAlXSRiV,Crackküche,de,2021-04-15,82,625718,['Haftbefehl'],['german hip hop'],198746,False
21,5m4y68zsFwsix7ng4cLB1j,Teamgeist,de,2015-12-03,43,444549,['Alligatoah'],['german pop'],238933,False
22,5m4y68zsFwsix7ng4cLB1j,Teamgeist,de,2015-12-10,54,343488,['Alligatoah'],['german pop'],238933,False
23,5m4y68zsFwsix7ng4cLB1j,Teamgeist,de,2015-12-17,102,214546,['Alligatoah'],['german pop'],238933,False
62,3UPXsxC1h5bPuXhBMAcWo7,NEXTLEVELSHIT,de,2016-11-03,186,169363,['DAT ADAM'],"['german cloud rap', 'deep german hip hop']",168573,False
...,...,...,...,...,...,...,...,...,...,...
5299146,20opc3dykV1sSxVwaGLUMc,Warum,de,2020-09-10,135,567554,['KAYEF'],"['german r&b', 'german pop', 'german hip hop']",170946,False
5299147,20opc3dykV1sSxVwaGLUMc,Warum,de,2020-09-17,135,594811,['KAYEF'],"['german r&b', 'german pop', 'german hip hop']",170946,False
5299148,20opc3dykV1sSxVwaGLUMc,Warum,de,2020-09-24,169,533979,['KAYEF'],"['german r&b', 'german pop', 'german hip hop']",170946,False
5299149,20opc3dykV1sSxVwaGLUMc,Warum,de,2020-10-01,181,490756,['KAYEF'],"['german r&b', 'german pop', 'german hip hop']",170946,False


In [31]:
df_ger['track_id'].nunique()

8781