In [None]:
import numpy as np
import pandas as pd

In [None]:
df_artist = pd.read_csv('./artist.csv')
df_sing = pd.read_csv('./sing.csv')
df_song = pd.read_csv('./song.csv')

In [None]:
# We will not need this DataFrame
df_artist.head(3)

Unnamed: 0,id,name,country,followers
0,60,The Weeknd,CAN,67886848
1,53,Dua Lipa,UK,31157517
2,22,Justin Bieber,CAN,36398525


In [None]:
df_sing.head(3)

Unnamed: 0,song_id,artist_id
0,53,60
1,74,53
2,11,22


In [None]:
df_song.head(3)

Unnamed: 0,id,title,genre,duration,streams,year
0,53,Blinding Lights,Pop,202,950000000,2024
1,74,Levitating,Pop,203,930000000,2024
2,11,Peaches,Pop,198,910000000,2024


In [None]:
# Merge df_sing with df_song using the song_id
df_genre = df_sing.merge(df_song[['id', 'genre']], left_on='song_id', right_on='id', how='left')
  # With how=‘left’ all rows of the left DataFrame (df_sing) will be preserved.
  # Only the values of the right DataFrame (df_song) are added when there is a match.
  # If there is no match, NaN values are set for the right DataFrame columns.

df_genre.head(3)

Unnamed: 0,song_id,artist_id,id,genre
0,53,60,53,Pop
1,74,53,74,Pop
2,11,22,11,Pop


In [None]:
# Eliminate duplicates so that each artist has unique genres
df_genre = df_genre[['artist_id', 'genre']].rename(columns={'genre': 'genre_name'}).drop_duplicates()

df_genre.head(3)

Unnamed: 0,artist_id,genre_name
0,60,Pop
1,53,Pop
2,22,Pop


In [None]:
# Count how many genres are associated with each artist
artists_with_multiple_genres = df_genre.groupby('artist_id').size()

print(artists_with_multiple_genres)

artist_id
1     1
2     1
3     1
4     1
5     1
     ..
75    1
76    1
77    1
78    1
79    2
Length: 79, dtype: int64


In [None]:
# Filter artists that have more than 1 associated genre
artists_with_multiple_genres = artists_with_multiple_genres[artists_with_multiple_genres > 1]

print(artists_with_multiple_genres)

artist_id
18    2
19    2
23    2
34    2
42    2
51    2
53    2
62    3
66    2
70    2
79    2
dtype: int64


In [None]:
df_genre.to_csv('genre.csv', index=False)