## EDA

In [2]:
import pandas as pd
import numpy as np
from copy import deepcopy

In [4]:
artists = pd.read_csv('./artists(in).csv', index_col=0)
conflicts = pd.read_csv('./conflicts(in).csv', index_col=0)

In [46]:
artists.head()

Unnamed: 0,name,genre,popularity
0,Midnight Echo,Rock,75
1,Solar Flare,Electronic,78
2,Velvet Pulse,Jazz,35
3,Neon Reverie,Electronic,100
4,The Silver Owls,Classical,85


In [52]:
artists.shape #35 artists

(35, 3)

In [60]:
artists['genre'].nunique() #6 different genres

6

In [62]:
print(artists['popularity'].min()) #the less popular artist has a popularity score of 20
print(artists['popularity'].max()) #the most popular artist has a popularity score of 100

20
100


In [48]:
conflicts.head()

Unnamed: 0,Midnight Echo,Solar Flare,Velvet Pulse,Neon Reverie,The Silver Owls,Echo Chamber,Aurora Skies,Static Mirage,Crimson Harmony,Deep Resonance,...,Rhythm Alchemy,Cloud Nine Collective,Hypnotic Echoes,The Polyrhythm Syndicate,Harmonic Dissonance,Turbo Vortex,The Jazz Nomads,The Bassline Architects,Cosmic Frequency,Parallel Dimension
Midnight Echo,0.0,0.0,0.0,0.2,0.5,0.0,0.8,1.0,0.2,0.5,...,0.2,0.8,1.0,1.0,0.65,1.0,0.4,0.4,1.0,0.2
Solar Flare,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.65,...,0.2,0.65,0.0,0.65,0.4,0.4,0.4,0.0,0.0,1.0
Velvet Pulse,0.0,0.0,0.0,1.0,0.5,0.0,0.65,1.0,0.5,1.0,...,1.0,1.0,0.4,1.0,0.7,0.0,1.0,0.15,1.0,0.4
Neon Reverie,0.2,1.0,1.0,0.0,0.2,0.9,0.2,1.0,0.0,1.0,...,0.0,0.0,0.0,0.2,0.65,0.65,0.2,0.0,0.2,1.0
The Silver Owls,0.5,0.0,0.5,0.2,0.0,1.0,0.0,1.0,0.9,0.0,...,1.0,0.65,0.0,0.2,0.9,0.2,0.2,0.4,1.0,0.0


In [54]:
conflicts.shape #35 artists

(35, 35)

##### Diversity

In [8]:
#check if artists are mainly from one gender or are they equally distributed
print(artists['genre'].value_counts())

genre
Rock          10
Electronic     7
Jazz           7
Classical      5
Pop            3
Hip-Hop        3
Name: count, dtype: int64


##### Popularity

In [26]:
#check the existing values of popularity
print(sorted(artists['popularity'].unique().tolist(), reverse=True))

[100, 99, 98, 97, 96, 95, 94, 90, 88, 85, 84, 78, 77, 75, 72, 69, 66, 64, 61, 58, 53, 51, 47, 35, 20]


In [30]:
#check the most popular artists
print("Most popular artists:")
print(artists[artists['popularity']>90])
# 1- Neon Reverie- Electronic - 100

Most popular artists:
                     name       genre  popularity
3            Neon Reverie  Electronic         100
5            Echo Chamber  Electronic          98
7           Static Mirage        Rock          94
13         Lunar Spectrum        Rock          99
14       Synthwave Saints        Rock          94
21       Celestial Voyage  Electronic          95
22           Quantum Beat     Hip-Hop          96
23      Electric Serpents  Electronic          99
25         Rhythm Alchemy        Jazz          94
26  Cloud Nine Collective         Pop          97
29    Harmonic Dissonance   Classical          96


In [34]:
print("Genres maximum and mininum artists'popularity:")
min_max_pop_by_gender= artists.groupby('genre')['popularity'].agg(['min', 'max'])
print(min_max_pop_by_gender) #checking the genres maximum and mininum artists'popularity

Genres maximum and mininum artists'popularity:
            min  max
genre               
Classical    20   96
Electronic   58  100
Hip-Hop      47   96
Jazz         35   94
Pop          51   97
Rock         53   99


##### Conflicts

In [72]:
#checking the artists which have the highest count of 1's
conflicts['count_conflict_one'] = (conflicts == 1.0).sum(axis=1)
top_3_artists = conflicts['count_conflict_one'].sort_values(ascending=False).head(3)
top_3_artists

Static Mirage         16
The Sonic Drifters    15
Turbo Vortex          13
Name: count_conflict_one, dtype: int64

In [74]:
conflicts.drop(columns=['count_conflict_one'], inplace=True)

In [76]:
#check the artists with the most conflicts overall, based on the row sum, not only on the ones
print("Artists with most conflicts:")
conflict_total = conflicts.sum(axis=1)
most_conflicted = conflict_total.sort_values(ascending=False)
print(most_conflicted.head())

Artists with most conflicts:
Static Mirage         23.80
The Sonic Drifters    22.60
Velvet Pulse          20.20
Turbo Vortex          19.90
Lunar Spectrum        18.45
dtype: float64


In [78]:
#checking if the highest conflicts of each artist are usually with artists of the same gender
top_conflicts = {}
for artist in conflicts.index:
    top_3 = conflicts.loc[artist].drop(artist).sort_values(ascending=False).head(3).index.tolist()
    top_conflicts[artist] = top_3

top_conflicts_df = pd.DataFrame([
    {'artist': artist, 'top_1': tops[0], 'top_2': tops[1], 'top_3': tops[2]}
    for artist, tops in top_conflicts.items()
])

artists_genres = artists[['name', 'genre']]
merged = top_conflicts_df.merge(artists_genres, left_on='artist', right_on='name').drop(columns=['name'])
merged = merged.rename(columns={'genre': 'artist_genre'})

for i in range(1, 4):
    merged = merged.merge(artists_genres, left_on=f'top_{i}', right_on='name', how='left')
    merged = merged.rename(columns={'genre': f'top_{i}_genre'})
    merged.drop(columns=['name'], inplace=True)

print(merged.head(5))

            artist               top_1                  top_2  \
0    Midnight Echo  Velvet Underground       Cosmic Frequency   
1      Solar Flare  Velvet Underground           Echo Chamber   
2     Velvet Pulse  Velvet Underground  Cloud Nine Collective   
3     Neon Reverie  Parallel Dimension           Golden Ember   
4  The Silver Owls      Lunar Spectrum         Mystic Rhythms   

              top_3 artist_genre top_1_genre top_2_genre top_3_genre  
0     Static Mirage         Rock        Rock        Rock        Rock  
1  Celestial Voyage   Electronic        Rock  Electronic  Electronic  
2      Neon Reverie         Jazz        Rock         Pop  Electronic  
3      Velvet Pulse   Electronic  Electronic        Rock        Jazz  
4      Echo Chamber    Classical        Rock   Classical  Electronic  


**Note:** It seems that there isn't a pattern, you can have an artist that has conflicts mainly with artists of the same genre (like Midnight Echo) but at the same time, you can have an artist that has conflicts mainly with different genres (like Velvet Pulse)

In [44]:
#for the most popular artists, check what are the artists they have more conflicts with
print('\n')
print("For the most popular check what are the artists they have more conflicts with:")
popular_conflicts=artists[artists['popularity']>90]['name'].tolist()
popular_conflict_df = merged[merged['artist'].isin(popular_conflicts)].reset_index(drop=True)
print(popular_conflict_df)
#each artist seems to have a conflict with at least another artist from their genre or with a specific genre (for example, Cloud Nine Collective, which is a Pop artist has mainly conflicts with artists from Jazz) 



For the most popular check what are the artists they have more conflicts with:
                   artist               top_1             top_2  \
0            Neon Reverie  Parallel Dimension      Golden Ember   
1            Echo Chamber  Parallel Dimension  Cosmic Frequency   
2           Static Mirage       Midnight Echo    Lunar Spectrum   
3          Lunar Spectrum  Velvet Underground  Synthwave Saints   
4        Synthwave Saints       Midnight Echo    Lunar Spectrum   
5        Celestial Voyage        Velvet Pulse      Neon Reverie   
6            Quantum Beat     The Jazz Nomads    Deep Resonance   
7       Electric Serpents  Parallel Dimension   Hypnotic Echoes   
8          Rhythm Alchemy    Nightfall Sonata      Velvet Pulse   
9   Cloud Nine Collective        Velvet Pulse   The Jazz Nomads   
10    Harmonic Dissonance    Nightfall Sonata    Mystic Rhythms   

                 top_3 artist_genre top_1_genre top_2_genre top_3_genre  
0         Velvet Pulse   Electronic  Ele