# Building my user profile
This notebook loads my clean Last.fm data and builds the user profile based on artists tags to then compare with new artists.

In [1]:
# import libraries
import pandas as pd
import requests
from math import sqrt
from collections import defaultdict
import pickle
from dotenv import load_dotenv
import os

In [2]:
# load clean data
df = pd.read_csv("festivalrec_data.csv")
df.columns = ["artist","scrobbles"]

In [3]:
n_rows = 150 # top artists to keep for the profile, might want to keep more than this
df = df[0:n_rows]

In [4]:
# check head
df.head()

Unnamed: 0,artist,scrobbles
0,Teenage Fanclub,17226
1,The National,9347
2,Black Rebel Motorcycle Club,7107
3,Allo Darlin',6435
4,Travis,6365


In [5]:
# load .env with API key
load_dotenv()
API_KEY = os.getenv('API_KEY')

In [6]:
# loop over artists to get top tags
# this takes a while!

# preallocate dictionary. it will have artist names as keys, and a list of tags as values
library_artists_features = dict()
top_n = 21 # arbitrary number of tags
for artist in df["artist"]:
    url = f'http://ws.audioscrobbler.com/2.0/?method=artist.getTopTags&artist={artist}&api_key={API_KEY}&format=json&limit=20'
    try:
        response = requests.get(url)
        response.raise_for_status()
        artistdata = response.json()
        
        these_tags = []
        try:
            for tagname in artistdata['toptags']['tag'][:top_n]:
                if tagname['name'] == 'seen live': # exclude 'seen live'
                    continue
                these_tags.append(tagname['name'])
        except:
            print(f"Artist: {artist}")
        
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        
    library_artists_features[artist] = {'tags': these_tags}



In [7]:
# turn df into dict, artist: number of scrobbles
listening_history = dict(df.values)

In [8]:
# user profile function
# the output is a dictionary with tags as keys, and a value of match weighed by scrobbles
def construct_user_profile(library_artists_features, listening_history):
    profile = defaultdict(float)
    total_listens = sum(listening_history.values())

    for artist, listens in listening_history.items():
        for tag in library_artists_features[artist]['tags']:
            profile[tag] += listens / total_listens

    return profile

In [9]:
# get user profile
user_profile = construct_user_profile(library_artists_features, listening_history)
user_profile # dict of tag: value weighted by scrobbles

defaultdict(float,
            {'power pop': 0.24418154208503778,
             'indie': 0.9877897607536879,
             'Scottish': 0.1778309780587579,
             'indie rock': 0.9545292549894632,
             'alternative': 0.8926606855088632,
             'rock': 0.8834758894260565,
             'indie pop': 0.802896832775505,
             'britpop': 0.4231746622040411,
             'british': 0.5049584727903804,
             'pop': 0.7775660096690218,
             'alternative rock': 0.6867678505020451,
             '90s': 0.320906160902442,
             'scotland': 0.1686229391347465,
             'glasgow': 0.14054946076608404,
             'jangle pop': 0.1745188731870584,
             'UK': 0.4221016177017478,
             'shoegaze': 0.24338353787033593,
             'powerpop': 0.13054729143423824,
             'male vocalists': 0.3483985682409819,
             'guitar pop': 0.07589562414776249,
             'post-punk': 0.19733946944341146,
             'american': 0.30597

In [10]:
# save user profile
target_directory = '/Users/carolinashimabukuro/projects/festival-matchmaker/'
with open(target_directory + 'saved_user_profile.pkl', 'wb') as f:
    pickle.dump(user_profile, f)
