In [1]:
import pandas as pd
import requests
from collections import Counter
import json

CLIENT_ID = ""
CLIENT_SECRET = ""

SPOTIFY_USER_ID = ""

# Get a bearer token. It expires after 1 hour

In [2]:
auth_url = "https://accounts.spotify.com/api/token"
auth_params = {"grant_type": "client_credentials"}

response = requests.post(auth_url, auth=(CLIENT_ID, CLIENT_SECRET), data=auth_params)

response_data = response.json()
ACCESS_TOKEN = response_data["access_token"]

In [3]:
# Copied text via MacOS finder character recognition

with open("data/raw_artists.txt", "r") as file:
    data = file.read()

infrasound_lineup = [
    artist.rstrip()
    for artist in data.split("\n")
    if artist  # Remove trailing blank line
]

In [4]:
def artist_searches(raw_artist_name):
    """
    Call Spotify's api given the raw artist name.

    Return the first record where raw_artist_name matches what spotify gives.
    That artist record will contain the most tags.

    Otherwise, return all artist records.

    """
    artist_records = {}
    url = f"https://api.spotify.com/v1/search?q={raw_artist_name}&type=artist"
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}

    response = requests.get(url, headers=headers)
    data = response.json()
    for record in data["artists"]["items"]:
        artist = {
            "name": record["name"],
            "raw_name": raw_artist_name,
            "id": record["id"],
            "genres": record["genres"],
        }

        if artist["name"].lower() == artist["raw_name"].lower():
            return {artist["raw_name"].lower(): artist}

        artist_records[record["name"].lower()] = artist
    return artist_records

In [5]:
artist_records = []
non_match_records = {}
for raw_artist_name in infrasound_lineup:
    results = artist_searches(raw_artist_name)
    if raw_artist_name.lower() in results:
        # Exact Match
        artist_records.append(results[raw_artist_name.lower()])
    else:
        # Just return the first record if nothing else
        artist_records.append(list(results.values())[0])
        non_match_records[raw_artist_name] = results.values()

print(f"Completed with {len(non_match_records)=}")
df = pd.DataFrame(artist_records)
df.head()

Completed with len(non_match_records)=4


Unnamed: 0,name,raw_name,id,genres
0,Abstrakt Sonance,ABSTRAKT SONANCE,00qKBesewdWy5l0bpMdosp,"[deep dubstep, experimental bass]"
1,AES DANA,AES DANA,6n1fB5NgTsFNdT4JHpVMe1,"[ambient psychill, ambient trance, psychill]"
2,Alejo,ALEJO,50sIhX3HytFEwQXZJLUZQE,"[reggaeton, trap latino, urbano latino]"
3,American Grime,AMERICAN GRIME,3cyPRO15GSCgu9DlxtLJfR,[]
4,Anna Morgan,ANNA MORGAN,30X6dIzlcixPlRNNYesrA4,[]


# Anomaly/missing data analysis

## Original non match records were 17
'ABSTRAKT SONACE', 'AXJA', 'C-MON& KYPSKI (DJ SET)', 'CONFIDENTCHILL', 'DE-TU', 'DIEBYTHESWORD', 'D]MADD', 'GRLLSMTH', 'J.ADJODHA', 'KODEg', 'KYPSKI (DJ SET)', 'MR CARMACK', 'MUXMOOL', 'NAUTICAL DEVINE', "SUMTHIN' SUMTHIN", 'THE AUTONYM', 'THEWIDDLER'

Modified raw txt for things like spacing and incorrect characters
mis-spellings from lineup: Abstrakt Sonance, Nautical Divine


## Finishing with 4 non matches: 
- 'AXJA': Only has 1 song on Spotify. Outlier to my "first artist theory"
- 'CONFIDENTCHILL': Not on Spotify
- 'GRLLSMTH', : KllSmth hasn't released under Grllsmth to my knowledge
- 'THE AUTONYM' : Not on spotify

# Genre analysis

## What are genres, can we eat them?

In [6]:
raw_genres = [element for sublist in df.genres for element in sublist]
unique_genres = set(raw_genres)

Counter(raw_genres).most_common(10)

[('glitch hop', 7),
 ('deep dubstep', 6),
 ('experimental bass', 6),
 ('downtempo bass', 6),
 ('classic dubstep', 5),
 ('wave', 4),
 ('psychill', 3),
 ('wonky', 3),
 ('ambient trance', 2),
 ('halftime dnb', 2)]

In [7]:
# Saved as a CSV so other folks can use it 🙂
df.to_csv("data/infrasound_artists_and_spotify_data.csv", index=False)

# Grab top 10 tracks per artist. Save in tuple consisting of track uri and name

In [8]:
tracks = []

for artist_id in df.id:
    url = f"https://api.spotify.com/v1/artists/{artist_id}/top-tracks?market=ES"
    headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
    response = requests.get(url, headers=headers)
    tracks.extend(
        [(track["uri"], track["name"]) for track in response.json()["tracks"]]
    )

# Create a playlist consisting of top 10 tracks for each artist

In [9]:
# Got too lazy to figure out scopes.
# grabbed token from https://developer.spotify.com/ 🤦🏻‍♂️

OTHER_ACCESS_TOKEN = ""

In [10]:
url = f"https://api.spotify.com/v1/users/{SPOTIFY_USER_ID}/playlists"
headers = {
    "Authorization": f"Bearer {OTHER_ACCESS_TOKEN}",
    "Content-Type": "application/json",
}
response = requests.post(
    url,
    headers=headers,
    data=json.dumps(
        {
            "name": "Infrasound 2023",
            "description": "Made with love by Andrew Loutfi and ChatGPT via Spotify's Web API",
            "public": True,
        }
    ),
)
playlist_id = response.json()["id"]
href = response.json()["href"]
print(playlist_id, href)

1xCc4TruODjgZLRl2mR5Kz https://api.spotify.com/v1/playlists/1xCc4TruODjgZLRl2mR5Kz


# Add tracks to playlist. 
Spotify will only let you add a max of 100 tracks per call. segment accordingly.

In [11]:
segment_size = 100

for i in range(0, len(tracks), segment_size):
    smegment = tracks[i : i + segment_size]

    url = f"https://api.spotify.com/v1/playlists/{playlist_id}/tracks"
    add_songs = requests.post(
        url,
        headers=headers,
        data=json.dumps({"uris": [track[0] for track in smegment]}),
    )
    print(add_songs)

<Response [201]>
<Response [201]>
<Response [201]>
<Response [201]>
<Response [201]>
<Response [201]>
<Response [201]>
<Response [201]>
