In [1]:
import pandas as pd
import spotipy
from collections import Counter, defaultdict
import json
import re
import time
from spotipy.exceptions import SpotifyException

Reading all the data via files created from collection stage

In [2]:
with open('token.txt', 'r') as token_file:
    access_token = token_file.read().strip()

sp = spotipy.Spotify(auth=access_token)
user_info = sp.current_user()
username = user_info['id']
# username = "alexchavezjr22"

In [3]:
csv_name = f"music_data_{username}.csv"
df = pd.read_csv(csv_name)

In [4]:
json_name = f"genres_{username}.json"
with open(json_name,'r') as json_file:
    artist_genres = json.load(json_file)

Reviewing data to ensure smooth access

In [5]:
df.head() 

Unnamed: 0,artist_name,artist_id,first_added,first_song,first_album,first_album_type,last_added,last_song,last_album,last_album_type,main_songs_count,featured_songs_count,liked_songs_count,genres_count
0,Sleepy Hallow,6EPlBSH2RSiettczlz7ihV,2020-05-12 06:10:53+00:00,Wet Em Up Pt. 2,State of Emergency,album,2024-08-30 04:27:09+00:00,Nostalgia (feat. Gray Hawken),Nostalgia (feat. Gray Hawken),single,36,7,43,0
1,Gray Hawken,6qFnkXKWazlAU0OPb4Q4a1,2024-08-30 04:27:09+00:00,Nostalgia (feat. Gray Hawken),Nostalgia (feat. Gray Hawken),single,2024-08-30 04:27:09+00:00,Nostalgia (feat. Gray Hawken),Nostalgia (feat. Gray Hawken),single,0,1,1,0
2,Strick,2WInxWtfee6mRhUQUmIRAH,2020-07-31 04:01:36+00:00,Yacht Club (feat. Young Thug & Ty Dolla $ign),Yacht Club (feat. Young Thug & Ty Dolla $ign),single,2024-08-30 04:27:07+00:00,CR@SH (feat. Travis Scott),ALL TIME HIGH,album,4,3,7,0
3,Travis Scott,0Y5tJX1MQlPlqiwlOH1tJY,2017-06-09 10:46:39+00:00,Know No Better,Know No Better,single,2024-08-30 04:27:07+00:00,CR@SH (feat. Travis Scott),ALL TIME HIGH,album,90,125,215,0
4,Tasha,4ZbFpxbORhzU78ve8e62Ej,2024-08-30 04:27:05+00:00,Love's Changing,Love's Changing,single,2024-08-30 04:27:05+00:00,Love's Changing,Love's Changing,single,1,0,1,0


In [6]:
len(artist_genres.keys())

1002

In [7]:
strings_columns = ['artist_name','artist_id','first_song','first_album','first_album_type', 'last_song','last_album','last_album_type']
datetime_columns = ['first_added','last_added']

for string_col in strings_columns:
    df[string_col] = df[string_col].astype(str)

for datetime_col in datetime_columns:
    df[datetime_col] = pd.to_datetime(df[datetime_col])

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3796 entries, 0 to 3795
Data columns (total 14 columns):
 #   Column                Non-Null Count  Dtype              
---  ------                --------------  -----              
 0   artist_name           3796 non-null   object             
 1   artist_id             3796 non-null   object             
 2   first_added           3796 non-null   datetime64[ns, UTC]
 3   first_song            3796 non-null   object             
 4   first_album           3796 non-null   object             
 5   first_album_type      3796 non-null   object             
 6   last_added            3796 non-null   datetime64[ns, UTC]
 7   last_song             3796 non-null   object             
 8   last_album            3796 non-null   object             
 9   last_album_type       3796 non-null   object             
 10  main_songs_count      3796 non-null   int64              
 11  featured_songs_count  3796 non-null   int64              
 12  liked_

In [9]:
df.isnull().values.ravel().sum()

np.int64(0)

Function to find common trends between the user's genres and the common words among the genres

In [10]:
def find_most_common_words(strings_list):
    # Concatenate strings into a single string
    full_text = ' '.join(strings_list)

    # Use regex to tokenize the string into words
    words = re.findall(r'\b[\w&-]+\b', full_text.lower())  # Case insensitive

    # Count occurrences of each word
    word_counts = Counter(words)

    # Find the most common words
    most_common_words = word_counts.most_common(1000)  # Adjust the number as needed

    return most_common_words

In [12]:
def find_word_occurrences(strings_list, keyword):
    # Initialize an empty list to store the occurrences
    occurrences = []

    # Iterate over the strings in the list
    for s in strings_list:
        # Use regex to tokenize the string into words
        words = re.findall(r'\b[\w&-]+\b', s.lower())  # Case insensitive

        # Check if the keyword is present in the words
        if keyword in words:
            # If found, append the entire string to the list
            occurrences.append(s)

    return occurrences

In [13]:
all_genres = list(artist_genres.keys())
common_words = find_most_common_words(all_genres)
common_words

[('pop', 122),
 ('hip', 104),
 ('hop', 104),
 ('indie', 79),
 ('rap', 66),
 ('rock', 55),
 ('house', 34),
 ('trap', 29),
 ('r&b', 26),
 ('alternative', 21),
 ('drill', 18),
 ('modern', 18),
 ('jazz', 18),
 ('country', 17),
 ('soul', 16),
 ('canadian', 16),
 ('uk', 15),
 ('funk', 14),
 ('musica', 14),
 ('singer-songwriter', 13),
 ('australian', 13),
 ('new', 12),
 ('edm', 11),
 ('dance', 11),
 ('folk', 11),
 ('latin', 10),
 ('contemporary', 10),
 ('punk', 10),
 ('classic', 10),
 ('deep', 10),
 ('christian', 10),
 ('metal', 9),
 ('reggae', 8),
 ('swedish', 8),
 ('blues', 8),
 ('experimental', 7),
 ('underground', 7),
 ('south', 7),
 ('espanol', 7),
 ('emo', 7),
 ('indonesian', 7),
 ('francais', 7),
 ('bass', 7),
 ('korean', 7),
 ('french', 7),
 ('chill', 6),
 ('reggaeton', 6),
 ('melodic', 6),
 ('jersey', 6),
 ('argentino', 6),
 ('chileno', 6),
 ('mexicano', 6),
 ('dutch', 6),
 ('lo-fi', 6),
 ('music', 6),
 ('old', 6),
 ('school', 6),
 ('chicago', 5),
 ('viral', 5),
 ('urbano', 5),
 ('ga

In [14]:
common_word_to_genres = {}
for tuple in common_words:
    genre = tuple[0]
    results = find_word_occurrences(all_genres, genre)
    common_word_to_genres[genre] = results

In [15]:
common_word_to_genres

{'pop': ['chill pop',
  'singer-songwriter pop',
  'social media pop',
  'teen pop',
  'indie pop',
  'funk pop',
  'pop',
  'pop nacional',
  'latin pop',
  'latin viral pop',
  'modern indie pop',
  'pop dance',
  'nigerian pop',
  'pop rap',
  'dance pop',
  'pop r&b',
  'nyc pop',
  'canadian pop',
  'pop punk',
  'socal pop punk',
  'colombian pop',
  'pop reggaeton',
  'art pop',
  'candy pop',
  'uk pop',
  'pop argentino',
  'hypnagogic pop',
  'bedroom pop',
  'viral pop',
  'hip pop',
  'shiver pop',
  'modern country pop',
  'puerto rican pop',
  'post-teen pop',
  'pop venezolano',
  'noise pop',
  'dream pop',
  'panamanian pop',
  'south african pop',
  'south african pop dance',
  'la pop',
  'psychedelic pop',
  'pop peruano',
  'spanish pop',
  'jazz pop',
  'modern alternative pop',
  'australian pop',
  'barbadian pop',
  'sudanese pop',
  'indie pop rap',
  'pop emo',
  'experimental pop',
  'icelandic pop',
  'danish pop',
  'latin arena pop',
  'mexican pop',
  'p

Special Case for Hip Hop 
Upon review, hip and hop showed up as two different words and they often were the same word instances. So this section helps determine the hip-hop analomy

In [16]:
hip_hop_condition_met = True
if 'hip' not in common_word_to_genres or 'hop' not in common_word_to_genres:
    print("Either 'hip' or 'hop' (or both) don't exist.")
    hip_hop_condition_met = False
else:
    print("Either 'hip' or 'hop' (or both) exists in this list of genres")

Either 'hip' or 'hop' (or both) exists in this list of genres


In [17]:
if hip_hop_condition_met == True:
    set1 = set(common_word_to_genres['hip'])
    set2 = set(common_word_to_genres['hop'])

    if set1 == set2:
        print("Hip and Hop lists are the same!")
    else:
        print("Hip and Hop Lists are not the same!")
        
        # Find elements that are in set1 but not in set2
        hip_set1 = set1 - set2

        # Find elements that are in set2 but not in set1
        hop_set2 = set2 - set1

        print("Elements in Hip but not in Hop:", hip_set1)
        print("Elements in Hop but not in Hip:", hop_set2)

        hip_hop_subgenres_to_remove = hip_set1.union(hop_set2)
        hip_hop_list = set1.union(set2)
        hip_hop_list.difference_update(hip_hop_subgenres_to_remove)
        hip_hop_list = list(hip_hop_list)
        hip_list = list(hip_set1)
        hop_list = list(hop_set2)

        updated_word_to_genres = common_word_to_genres.copy()
        updated_word_to_genres['hip'] = hip_list
        updated_word_to_genres['hop'] = hop_list
        updated_word_to_genres['hip hop'] = hip_hop_list
        common_word_to_genres = dict(sorted(updated_word_to_genres.items()))


Hip and Hop Lists are not the same!
Elements in Hip but not in Hop: {'hip house', 'hip pop'}
Elements in Hop but not in Hip: {'trip hop', 'glitch hop'}


In [18]:
common_word_to_genres

{'150': ['funk 150 bpm'],
 '420': ['cumbia 420'],
 'abstract': ['chill abstract hip hop',
  'abstract beats',
  'abstract',
  'abstract hip hop'],
 'acid': ['acid house'],
 'acoustic': ['acoustic rock', 'acoustic pop', 'deep acoustic pop'],
 'adult': ['italian adult pop'],
 'aesthetic': ['aesthetic rap'],
 'african': ['south african pop',
  'south african pop dance',
  'south african house',
  'south african r&b',
  'south african hip hop'],
 'afro': ['afro r&b', 'afro soul', 'afro house angolano'],
 'afrobeat': ['christian afrobeat'],
 'afrobeats': ['afrobeats'],
 'afrofuturism': ['afrofuturism'],
 'afrofuturismo': ['afrofuturismo brasileiro'],
 'afroperuana': ['musica afroperuana'],
 'afropop': ['afropop', 'viral afropop'],
 'afroswing': ['afroswing'],
 'age': ['golden age hip hop'],
 'ai': ['ai'],
 'alabama': ['alabama rap', 'alabama indie'],
 'alberta': ['alberta hip hop', 'alberta country'],
 'album': ['album rock'],
 'algerien': ['rap algerien'],
 'alt': ['alt z'],
 'alte': ['alt

In [19]:
final_word_to_genres = {key: value for key, value in common_word_to_genres.items() if not key.isdigit()}

In [20]:
len(final_word_to_genres.keys())

697

Associating each artist and genre from our final list of common words

In [21]:
artist_map = {}
for word, genres in final_word_to_genres.items():
    artists_mapping = {}  # Dictionary to store artists and their genres
    for genre in genres:
        if genre in artist_genres:
            for artist in artist_genres[genre]:
                # Check if the artist is already in the mapping
                if artist in artists_mapping:
                    artists_mapping[artist].append(genre)
                else:
                    artists_mapping[artist] = [genre]

    artist_map[word] = artists_mapping


In [22]:
for word, artists in artist_map.items():
    for artist, genres in artists.items():
        # Look up the artist in the DataFrame
        artist_info = df[df['artist_name'] == artist]

        # Check if the artist is found in the DataFrame
        if not artist_info.empty:
            artist_map[word][artist] = {'genres': genres}

In [23]:
artist_map

{'abstract': {'Tommy Richman': {'genres': ['chill abstract hip hop']},
  'redveil': {'genres': ['chill abstract hip hop']},
  'MIKE': {'genres': ['chill abstract hip hop']},
  'MAVI': {'genres': ['chill abstract hip hop']},
  'MARCO PLUS': {'genres': ['chill abstract hip hop']},
  'Overpade': {'genres': ['chill abstract hip hop']},
  'Zelooperz': {'genres': ['chill abstract hip hop']},
  'Sideshow': {'genres': ['chill abstract hip hop']},
  'Navy Blue': {'genres': ['chill abstract hip hop']},
  'Knxwledge': {'genres': ['abstract beats']},
  'Wun Two': {'genres': ['abstract beats']},
  'Karriem Riggins': {'genres': ['abstract beats']},
  'Tuamie': {'genres': ['abstract beats']},
  'D33J': {'genres': ['abstract beats']},
  'Brian Eno': {'genres': ['abstract']},
  'Madvillain': {'genres': ['abstract hip hop']},
  'Madlib': {'genres': ['abstract hip hop']},
  'Viktor Vaughn': {'genres': ['abstract hip hop']},
  'Murs': {'genres': ['abstract hip hop']},
  'Son Lux': {'genres': ['abstract hi

Analysis for one song artists

In [24]:
years = sorted(df['first_added'].dt.year.unique())
print(years)

[np.int32(2016), np.int32(2017), np.int32(2018), np.int32(2019), np.int32(2020), np.int32(2021), np.int32(2022), np.int32(2023), np.int32(2024)]


In [25]:
one_song_liked = df[df['liked_songs_count'] == 1]
one_song_liked.head()

Unnamed: 0,artist_name,artist_id,first_added,first_song,first_album,first_album_type,last_added,last_song,last_album,last_album_type,main_songs_count,featured_songs_count,liked_songs_count,genres_count
1,Gray Hawken,6qFnkXKWazlAU0OPb4Q4a1,2024-08-30 04:27:09+00:00,Nostalgia (feat. Gray Hawken),Nostalgia (feat. Gray Hawken),single,2024-08-30 04:27:09+00:00,Nostalgia (feat. Gray Hawken),Nostalgia (feat. Gray Hawken),single,0,1,1,0
4,Tasha,4ZbFpxbORhzU78ve8e62Ej,2024-08-30 04:27:05+00:00,Love's Changing,Love's Changing,single,2024-08-30 04:27:05+00:00,Love's Changing,Love's Changing,single,1,0,1,0
6,Daniel Seavey,21z8to3YxZXgKYJpBB54P2,2024-08-30 04:26:54+00:00,Other People,Other People,single,2024-08-30 04:26:54+00:00,Other People,Other People,single,1,0,1,0
7,Hank Heaven,6bsNV1qaLfpRFLI2eWIHkf,2024-08-30 04:26:50+00:00,Beloved (feat. Beach Bunny),Beloved (feat. Beach Bunny),single,2024-08-30 04:26:50+00:00,Beloved (feat. Beach Bunny),Beloved (feat. Beach Bunny),single,1,0,1,0
9,Flawed Mangoes,4MrQDA45Gd0llLrwFUzimG,2024-08-30 04:26:46+00:00,Leave A Message (w/ aldn),Leave A Message (w/ aldn),single,2024-08-30 04:26:46+00:00,Leave A Message (w/ aldn),Leave A Message (w/ aldn),single,1,0,1,0


In [26]:
for year in years:
    print(f"{year} : {len(one_song_liked[one_song_liked['first_added'].dt.year == year])}")

2016 : 7
2017 : 17
2018 : 13
2019 : 310
2020 : 474
2021 : 521
2022 : 357
2023 : 216
2024 : 61


In [27]:
def search_genres(genres_map, artist_name):
    all_genres = set()
    for genre, list_of_artists in genres_map.items():
        if artist_name in list_of_artists:
            all_genres.add(genre)
    return all_genres

In [28]:
df[df['artist_name'] == 'Juice WRLD']

Unnamed: 0,artist_name,artist_id,first_added,first_song,first_album,first_album_type,last_added,last_song,last_album,last_album_type,main_songs_count,featured_songs_count,liked_songs_count,genres_count
183,Juice WRLD,4MCBfE4596Uoi2O4DtmEMz,2018-07-20 18:58:32+00:00,All Girls Are The Same,Goodbye & Good Riddance,album,2024-06-07 15:34:30+00:00,Wake Up! (feat. Juice WRLD),11th Dimension,album,229,65,294,0


In [29]:
json_name = "juice_wrld_saved.json"
with open(json_name,'r') as json_file:
   juice_wrld_saved = json.load(json_file)

In [30]:
juice_wrld_saved

{}

In [29]:
# one_song_artists = [name for name in one_song_liked['artist_name']]
# artists_to_remove = []

# for artist in one_song_artists:

#     spotify_id = one_song_liked[one_song_liked['artist_name'] == artist].iloc[0]['artist_id']
#     related_artists = sp.artist_related_artists(spotify_id)['artists']
#     related_artists_status = []

#     for related_artist in related_artists:
#         name = related_artist['name']
#         matching_rows = df[df['artist_name'] == name]
        
#         if not matching_rows.empty:
#             songs = matching_rows['liked_songs_count']
#             first_added = matching_rows['first_added'].min()
#             last_added = matching_rows['last_added'].max()

#             if (songs > 1).any() and (last_added - first_added).days > 0:
#                 related_artists_status.append(name)
        
#     if not related_artists_status:
#         artists_to_remove.append(artist)

In [30]:
len(artists_to_remove)

NameError: name 'artists_to_remove' is not defined

In [None]:
df = df[~df['artist_name'].isin(artists_to_remove)].reset_index(drop=True)
one_song_liked = one_song_liked[~one_song_liked['artist_name'].isin(artists_to_remove)].reset_index(drop=True)

In [None]:
one_song_liked

In [None]:
one_song_median_genre_val = one_song_liked['genres_count'].median()
one_song_artists_below_median = one_song_liked[one_song_liked['genres_count'] < one_song_median_genre_val]
one_song_artists_below_median = one_song_artists_below_median[one_song_artists_below_median['genres_count'] != 0]
one_song_artists_above_median = one_song_liked[one_song_liked['genres_count'] >= one_song_median_genre_val]

In [None]:
len(one_song_artists_below_median)

In [None]:
len(one_song_artists_above_median)

In [None]:
one_song_artists_above_median.head()

In [None]:
one_song_and_one_genre_artists = one_song_artists_above_median[one_song_artists_above_median['genres_count'] == 1]
len(one_song_and_one_genre_artists)

In [None]:
one_song_and_one_genre_artists.loc[:,'genre'] = one_song_and_one_genre_artists['artist_name'].apply(lambda name : search_genres(artist_genres, name))
one_song_and_one_genre_artists.loc[:,'only_artist_in_genre'] = one_song_and_one_genre_artists.apply(lambda genre_set: len(genre_set) == 1 and list(genre_set)[0] in artist_genres and len(artist_genres[list(genre_set)[0]]) == 1)
one_song_and_one_genre_artists_true_results = one_song_and_one_genre_artists[one_song_and_one_genre_artists['only_artist_in_genre'] == True].reset_index(drop=True)
one_song_and_one_genre_artists_true_results

In [None]:
# from sklearn.feature_extraction.text import TfidfVectorizer
# from sklearn.preprocessing import MultiLabelBinarizer
# from sklearn.model_selection import train_test_split, KFold
# from sklearn.neighbors import KNeighborsClassifier
# import matplotlib.pyplot as plt

# # Your existing code for creating features
# artist_genres_pairs = [(genre, artist) for genre, artists in artist_genres.items() for artist in artists]
# artist_genres_df = pd.DataFrame(artist_genres_pairs, columns=["Genre", "Artist"])

# mlb = MultiLabelBinarizer()
# genre_labels = mlb.fit_transform(artist_genres_df["Genre"])

# tfidf_vectorizer = TfidfVectorizer()
# artist_tfidf_features = tfidf_vectorizer.fit_transform(artist_genres_df["Artist"])

# # Convert the column names of TF-IDF features to strings
# tfidf_feature_names = tfidf_vectorizer.get_feature_names_out()
# tfidf_df = pd.DataFrame(artist_tfidf_features.toarray(), columns=tfidf_feature_names)

# # Combine genre labels and artist TF-IDF features into a single feature matrix
# features = pd.concat([tfidf_df, pd.DataFrame(genre_labels, columns=mlb.classes_)], axis=1)

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(features, artist_genres_df["Genre"], test_size=0.2, random_state=42)

# import numpy as np

# X_train = np.array(X_train)
# X_test = np.array(X_test)

# # Train the KNN model
# knn_model = KNeighborsClassifier(n_neighbors=5)
# knn_model.fit(X_train, y_train)

# # Evaluate the model
# accuracy = knn_model.score(X_test, y_test)
# print(f"Accuracy: {accuracy * 100}")

In [None]:
# cv_errors = []
# n_folds = 5
# max_neighbors = 100

# for k in range(1, max_neighbors):
#     kf = KFold(n_splits=n_folds)
#     fold_errors = np.zeros(n_folds)

#     for fold_num, (cv_train, cv_test) in enumerate(kf.split(X_train)):

#         x_train_response = X_train[cv_train]  
#         y_train_response = y_train.iloc[cv_train]

#         x_test_response = X_train[cv_test]  
#         y_test_response = y_train.iloc[cv_test]

#         cv_knn_clf = KNeighborsClassifier(n_neighbors=k)
#         cv_knn_clf.fit(x_train_response, y_train_response)

#         cv_pred = cv_knn_clf.predict(x_test_response)
#         fold_errors[fold_num] = np.sum(cv_pred == y_test_response) / len(cv_pred)
    
#     cv_errors += [np.mean(fold_errors)]

# plt.plot(np.arange(1, max_neighbors), cv_errors)
# plt.xlabel("Nearest Neighbors K")
# plt.ylabel("Cross-Validation Accuracy")
# plt.show()

# best_k = np.argmax(np.array(cv_errors)) + 1
# print("Best K Value for the K-Model is:", best_k)

In [None]:
# # Train the KNN model
# knn_model = KNeighborsClassifier(n_neighbors=best_k)
# knn_model.fit(X_train, y_train)

# # Evaluate the model
# accuracy = knn_model.score(X_test, y_test)
# print(f"Accuracy: {accuracy * 100}")

In [None]:
# import numpy as np
# from sklearn.feature_extraction.text import CountVectorizer
# from sklearn.cluster import AgglomerativeClustering
# from scipy.cluster.hierarchy import dendrogram, linkage
# import matplotlib.pyplot as plt

# # Create a list of genre-artist strings
# genre_artist_strings = [genre + ": " + ", ".join(artists) for genre, artists in artist_genres.items()]

# # Convert genre-artist strings to a bag-of-words representation
# vectorizer = CountVectorizer()
# X = vectorizer.fit_transform(genre_artist_strings)

# # Perform hierarchical clustering
# linkage_matrix = linkage(X.toarray(), method='ward')

# # Convert dict_keys to a list
# genre_labels = list(artist_genres.keys())

# # Plot the dendrogram
# dendrogram(linkage_matrix, labels=genre_labels, orientation='left', distance_sort='descending')
# plt.show()

# # Use AgglomerativeClustering to get cluster labels
# num_clusters = 3  # Adjust as needed
# clustering = AgglomerativeClustering(n_clusters=num_clusters, affinity='euclidean', linkage='ward')
# cluster_labels = clustering.fit_predict(X.toarray())

# # Print the cluster labels for each genre
# for genre, label in zip(genre_labels, cluster_labels):
#     print(f"{genre}: Cluster {label}")


In [None]:
# single_artist_genre_pairs = {}
# multiple_artist_genre_pairs = {}

# for year, artist_genre_pairs in one_song_genres_by_year.items():
#     for artist, genres_set in artist_genre_pairs.items():
#         for genre in genres_set:
            
#             list_of_artists = artist_genres[genre]

#             if artist in list_of_artists:
#                 if len(list_of_artists) == 1:
#                     # Artist is the only one in the genre
#                     single_artist_genre_pairs.setdefault(year, {}).setdefault(artist, set()).add(genre)

#                 else:
#                     # Artist is in multiple genres
#                     other_genres = search_genres(artist_genres, artist)

#                     if artist in single_artist_genre_pairs.get(year, {}):
#                         single_entries = single_artist_genre_pairs[year][artist]
#                         other_genres.difference_update(single_entries)

#                     multiple_artist_genre_pairs.setdefault(year, {}).update({artist: other_genres})

In [None]:
# single_artist_genre_pairs

In [None]:
# multiple_artist_genre_pairs

In [None]:
# # Find common artists and genres
# common_artists_genres = {}

# for year, artists_dict1 in one_song_genres_by_year.items():
#     if year in single_artist_genre_pairs:
#         common_entries = {}
#         for artist, genres1 in artists_dict1.items():
#             if artist in single_artist_genre_pairs[year]:
#                 genres2 = single_artist_genre_pairs[year][artist]
#                 if genres1 == genres2:
#                     common_entries[artist] = genres1

#         if common_entries:
#             common_artists_genres[year] = common_entries

# # Print the common artists and genres
# for year, artists_dict in common_artists_genres.items():
#     print(f"{year}: {artists_dict}")

In [None]:
# artists_to_remove

In [None]:
# len(artists_to_remove)

In [None]:
# # Find common artists and genres
# common_artists_genres = {}

# for year, artists_dict1 in one_song_genres_by_year.items():
#     if year in multiple_artist_genre_pairs:
#         common_entries = {}
#         for artist, genres1 in artists_dict1.items():
#             if artist in multiple_artist_genre_pairs[year]:
#                 genres2 = multiple_artist_genre_pairs[year][artist]
#                 if genres1 == genres2:
#                     common_entries[artist] = genres1

#         if common_entries:
#             common_artists_genres[year] = common_entries

# # Print the common artists and genres
# for year, artists_dict in common_artists_genres.items():
    # print(f"{year}: {artists_dict}")

In [None]:
# # Initialize a dictionary to store main genres and subgenres
# organized_genres = {}

# # Iterate over each genre in the original dictionary
# for genre, artists in artist_genres.items():
#     # Split the genre into main genre and subgenre (if exists)
#     main_genre, *subgenres = genre.split()

#     # Create a dictionary to store artists for the current genre
#     genre_data = {'artists': artists}

#     # Check if the main genre already exists in the organized dictionary
#     if main_genre not in organized_genres:
#         organized_genres[main_genre] = {}

#     # If there are subgenres, nest them inside the main genre
#     current_dict = organized_genres[main_genre]
#     for subgenre in subgenres:
#         # Check if the subgenre already exists
#         if subgenre not in current_dict:
#             current_dict[subgenre] = {}

#         # Move to the next level in the hierarchy
#         current_dict = current_dict[subgenre]

#     # Store the genre data at the deepest level
#     current_dict.update(genre_data)

In [None]:
#organized_genres