In [1]:
import json, os, sys, math, time, random, requests as req
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

In [2]:
import librosa 

In [20]:
SPOTIPY_CLIENT_ID = '' # your spotify client ID here
SPOTIPY_CLIENT_SECRET = '' # your spotify client secret here

sp = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(client_id=SPOTIPY_CLIENT_ID, client_secret=SPOTIPY_CLIENT_SECRET))

In [5]:
with open('msd-data/searchin.json', 'r') as f:
    searching = json.load(f)

In [18]:
AUDIO_FILE = 'funny.mp3'

def download_preview(song: dict):
    title, artist = song['title'], song['artist']
    try:
        search = sp.search(q=f"track:{title} artist:{artist}", offset=0, type='track', limit=1)
    except spotipy.SpotifyException as ex:
        if ex.code == -1 and ex.http_status == 400:
            return '', False
        else:
            raise

    tracks = search['tracks']['items']
    if len(tracks) == 0:
        return '', False
    
    url = tracks[0]['preview_url']
    
    return tracks[0]['id'], download_preview_url(url)

def download_preview_url(url: str):
    if url == 'None' or url is None:
        return False

    session = req.Session()
    content = session.get(url).content

    with open(AUDIO_FILE, 'wb') as f:
        f.write(content)

    return True


In [7]:
searching_l = [(k, v) for k, v in searching.items()]
searching_l_copy = searching_l

In [None]:
offset = 0
for i, (id, value) in enumerate(searching_l_copy[offset:]):
    if i % 5 == 0:
        print(f'Analyzing {id} (#{i+offset}) | timestamp: {time.strftime("%I:%M:%S", time.localtime())}', end='\r')
    
    if not download_preview(value):
        continue
    
    y, sr = librosa.load(AUDIO_FILE, sr=44100)
    
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_fft=2048, n_mfcc=15, hop_length=1024)
    mfcc = mfcc[:, :1200]
    listm = [[round(float(c), 3) for c in l] for l in mfcc]

    with open(f'msd-data/mfcc-features/{id}.json', 'w') as f:
        json.dump(listm, f)

    time.sleep(0.95)

In [5]:
to_explore = [('3lPr8ghNDBLc2uZovNyLs9', "https://p.scdn.co/mp3-preview/f4fe399267e6093182d576a6c84c0e081c81ff90?cid=9f313b6479844355af4354c1c85d1918")]
i = 0
while True:
    id, url = to_explore.pop(0)
    # if i % 5 == 0:
    print(f'Analyzing {id} (#{i}) | timestamp: {time.strftime("%I:%M:%S", time.localtime())}', end='\r')
        
    if not download_preview_url(url):
        continue

    y, sr = librosa.load(AUDIO_FILE, sr=44100)
    
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_fft=2048, n_mfcc=15, hop_length=1024)
    mfcc = mfcc[:, :1200]
    listm = [[round(float(c), 3) for c in l] for l in mfcc]

    with open(f'msd-data/mfcc-features/{id}.json', 'w') as f:
        json.dump(listm, f)

    neighbors = sp.recommendations(seed_tracks=['33UFGRwKJzhi4H86Hd2Efs'])['tracks']
    if neighbors:
        to_explore.extend([(t['id'], t['preview_url']) for t in neighbors])

    time.sleep(2.5)

    i += 1

Analyzing 7ivyP9RzZ9NvK2ZZ74lISK (#201) | timestamp: 12:55:07

In [5]:
LAST_FM_API = '' # your last FM api key here

In [39]:
def last_fm_get_simiar(artist: str, track: str) -> dict:
    params = {
        'api_key': LAST_FM_API,
        'method': 'track.getSimilar',
        'artist': artist,
        'track': track,
        'format': 'json',
        'autocorrect': 1
    }
    
    try:
        with req.Session() as s:
            return s.get('http://ws.audioscrobbler.com/2.0', params=params).json()['similartracks']['track']
    except ConnectionResetError:
        return None
    except ConnectionError:
        return None
    except json.JSONDecodeError:
        return None

In [None]:
tracks = last_fm_get_simiar('Foster the people', 'pumped up kicks')
tracks

In [36]:
download_preview({'title': 'Lady - Hear me tonight', 'artist': 'Modjo'})

('49X0LAl6faAusYq02PRAY6', True)

In [None]:
to_explore = [('Foster the people', 'pumped up kicks')]
explored = set([])
i = 0
while True:
    artist, track = to_explore.pop(0)
    
    explored.add((artist, track))

    id, success = download_preview({'title': track, 'artist': artist})
    
    if not success:
        continue
    
    # if i % 5 == 0:
    print(f'Analyzing {artist}|{track} (#{i}) | Track ID: {id} | timestamp: {time.strftime("%I:%M:%S", time.localtime())}', end='\r', flush=True)

    tracks = last_fm_get_simiar(artist, track)
    if tracks == None:
        print('Encountered an error oops')
        continue

    info_pairs = [pair for track in tracks if (pair := (track['artist']['name'], track['name'])) not in explored]
    to_explore.extend(info_pairs)

    if not os.path.exists(f'msd-data/mfcc-features/{id}.json'):

        y, sr = librosa.load(AUDIO_FILE, sr=44100)
        
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_fft=2048, n_mfcc=15, hop_length=1024)
        mfcc = mfcc[:, :1200]
        listm = [[round(float(c), 3) for c in l] for l in mfcc]

        with open(f'msd-data/mfcc-features/{id}.json', 'w') as f:
            json.dump(listm, f)

        i += 1

    time.sleep(0.95)