# Ranking entities 

In [89]:
import pandas as pd
import warnings

with warnings.catch_warnings():
    warnings.simplefilter('ignore')
    
    # Import entities / skills / wikipedia
    entities = pd.read_csv("../data/classeek_entities.csv")
    skills = pd.read_csv("../data/classeek_skills.csv")
    alt_names = pd.read_csv("../data/classeek_alternative_names.csv")
    performances = pd.read_csv("../data/classeek_performances.csv")
    wikipedia_pv = pd.read_csv("../data/classeek_wikipedia_page_views.csv")
    mediamentions = pd.read_csv("../data/classeek_media_mentions.csv")
    biographies = pd.read_csv("../data/classeek_biographies.csv")

    # Import entities relations
    entities_performances = pd.read_csv("../data/classeek_entities_performances.csv")
    entities_pieces = pd.read_csv("../data/classeek_entities_pieces.csv")
    entities_skills = pd.read_csv("../data/classeek_entity_skills.csv")

## Number of Performances per entity ranking 

In [90]:
# Unique entities 
print('Total number of entities :', len(entities_performances.entity_id.unique()))
df = entities_performances.groupby('entity_id').count().sort_values('id', ascending=False)
df['count'] = df.id
df = df.loc[:, ['count']]
df.head(10)

Total number of entities : 110496


Unnamed: 0_level_0,count
entity_id,Unnamed: 1_level_1
646cd5c9-e37f-4009-9daa-8d055f47c5af,22913
890a2d3f-4eac-4e75-87c8-871239f553b9,15646
acf05239-b57d-436a-ab96-dc4589cd0d17,5322
64e2e1b0-cf8e-433e-9423-d0c2fe30aa52,4389
bd01268d-8a66-4139-88d8-13cffa7702d8,3969
7aa95431-6eee-4efd-9254-8044489aacd6,3756
2ffe418b-f92d-400b-9b2c-8b3f53c4fa8f,2628
10f63486-bb13-47d4-b4da-ddc0d3b3396a,2565
c347bc25-007f-4ea2-9ed1-385bb3eaa53a,2183
eee8bcba-435e-4efb-995d-45686afb36fc,2130


In [91]:
# Quick Overview of distribution 

print('Only one performance :', round(100*len(df[df['count']==1])/len(df), 2), '%')
print('Less than two performances :', round(100*len(df[df['count']<=2])/len(df), 2), '%')
print('')
print('More than three performances :', round(100*len(df[df['count']>=3])/len(df), 2), '%')
print('More than five performances :', round(100*len(df[df['count']>=5])/len(df), 2), '%')
print('More than ten performances :', round(100*len(df[df['count']>=10])/len(df), 2), '%')
print('More than fifty performances :', round(100*len(df[df['count']>=50])/len(df), 2), '%')
print('More than a hundred performances :', round(100*len(df[df['count']>=100])/len(df), 2), '%')

Only one performance : 48.46 %
Less than two performances : 70.92 %

More than three performances : 29.08 %
More than five performances : 16.84 %
More than ten performances : 8.24 %
More than fifty performances : 1.39 %
More than a hundred performances : 0.58 %


### Top (Most performances)

In [92]:
# Top 
limit = 10 
top = df.head(limit).index.to_list()

print('Top 10')
for i in range(limit) : 
    print(i, entities.loc[entities.id == top[i]].name.to_string(index = False))

Top 10
0  Boston Symphony Orchestra
1  New York Philharmonic
2  Henry Wood
3  Serge Koussevitzky
4  Bbc Symphony Orchestra
5  The New Queen'S Hall Orchestra
6  Frederick Kiddle
7  World Premiere
8  Charles Munch
9  New York Premiere


In [93]:
# Top three orchestras
top_orch = [0, 1, 4]
top_orch = df.iloc[top_orch].index.to_list()

# Top three artists 
top_artists = [2, 3, 6]
top_artists = df.iloc[top_artists].index.to_list()

### Only three performances

In [94]:
# At least 3 to prevent selecting isolated cases (aliases)
limit = 10 
three_perf = df[df['count']==3].head(limit).index.to_list()

print('10 examples with 3 performances')
for i in range(limit) : 
    print(i, entities.loc[entities.id == three_perf[i]].name.to_string(index = False))

10 examples with 3 performances
0  Leslie Amper
1  Eunice Kim
2  Ensemble Les Surprises
3  Francesca Braggiotti
4  Milos Bulajic
5  Dominique A
6  Santiago Rodriguez
7  Hank Roberts
8  Davide Lattuada
9  Martine Ritz


In [95]:
# Three artists 
low_artists = [0, 1, 2]
low_artists = df[df['count']==3].iloc[low_artists].index.to_list()

## Basic information for one pager

In [96]:
# Wanted Information 
infos = ['id', 'name', 'gender', 'alive', 'birth_date', 'death_date', 'current_influence_percentile']
entities.head(3)

Unnamed: 0.1,Unnamed: 0,id,parameterized_name,name,created_at,updated_at,gender,alive,birth_date,death_date,kind,not_usable,accepted,qa_flags,action_in_progress,current_influence_percentile
0,0,bc8a649f-10fe-4685-9164-859499b6b06a,antonin_dvorak,Antonín Dvořák,2019-05-23 09:55:52.711701,2020-04-30 14:05:38.223370,,False,,,0,False,False,0,,
1,1,e5f113de-59b1-4624-9a43-d60747c57602,leonard_bernstein,Leonard Bernstein,2019-04-10 22:34:21.328525,2020-04-30 14:10:50.742145,,False,,,0,False,False,0,,
2,2,291f205d-2090-49bb-a1ba-454c1cd26141,ludwig_van_beethoven,Ludwig Van Beethoven,2019-04-10 22:56:12.313574,2020-04-30 14:10:52.466085,,False,,,0,True,True,0,,


#### Get infos for list of ids

In [97]:
def get_info_for_list(id_list) : 
    
    entity_infos = ['id', 'name', 'gender', 'alive', 'birth_date', 'death_date', 'current_influence_percentile']
    df_info = entities[entities.id.apply(lambda x: x in id_list)].loc[:, entity_infos]
    
    return  df_info 

### Top Orchestras

In [98]:
info_top_orch = get_info_for_list(top_orch)
info_top_orch

Unnamed: 0,id,name,gender,alive,birth_date,death_date,current_influence_percentile
75839,646cd5c9-e37f-4009-9daa-8d055f47c5af,Boston Symphony Orchestra,,True,,,99.0
76833,bd01268d-8a66-4139-88d8-13cffa7702d8,Bbc Symphony Orchestra,,True,,,99.0
146737,890a2d3f-4eac-4e75-87c8-871239f553b9,New York Philharmonic,,True,,,


### Top Artists 

In [99]:
info_top_artists = get_info_for_list(top_artists)
info_top_artists

Unnamed: 0,id,name,gender,alive,birth_date,death_date,current_influence_percentile
15453,2ffe418b-f92d-400b-9b2c-8b3f53c4fa8f,Frederick Kiddle,,True,,,
76419,64e2e1b0-cf8e-433e-9423-d0c2fe30aa52,Serge Koussevitzky,,False,,,
81967,acf05239-b57d-436a-ab96-dc4589cd0d17,Henry Wood,,True,,,78.0


### Entities with less performances 

In [100]:
info_low_artists = get_info_for_list(low_artists)
info_low_artists

Unnamed: 0,id,name,gender,alive,birth_date,death_date,current_influence_percentile
75484,986c8d27-9fc3-4f3c-9314-8401e4bec003,Ensemble Les Surprises,,True,,,53.0
81704,ddd9bb4b-8965-4046-8b20-9b2f273fe0d9,Eunice Kim,,True,,,38.0
88062,b6f086e1-56d8-4bd3-b6ff-1f01cf9c533c,Leslie Amper,,True,,,


### Gauthier Capuçon / Renaud Capuçon
- Gauthier Capuçon / id = 149b70fa-a190-4a83-bea9-38dafc1d4c5c 
- Renaud Capuçon / id = d414cdfd-b2c4-4d9f-adce-7c1a6d03d162

In [101]:
entities[entities.parameterized_name.str.contains('capucon')]

Unnamed: 0.1,Unnamed: 0,id,parameterized_name,name,created_at,updated_at,gender,alive,birth_date,death_date,kind,not_usable,accepted,qa_flags,action_in_progress,current_influence_percentile
758,758,149b70fa-a190-4a83-bea9-38dafc1d4c5c,gautier_capucon,Gautier Capuçon,2019-04-10 22:00:12.231805,2020-06-26 09:30:20.168532,,True,,,0,False,False,0,,99.0
2542,2542,4a3cc23c-93c9-4a25-a0fa-e1a6be452cea,renaud_capucon_lausanne_soloists,Renaud Capuçon - Lausanne Soloists,2020-07-15 08:03:00.308463,2020-07-26 08:05:15.398801,,True,,,0,True,True,0,,38.0
2760,2760,de876f79-75c7-4344-887f-fb5a633ad7b0,quatuor_renaud_capucon,Quatuor Renaud Capuçon,2019-04-10 23:25:28.865209,2019-04-10 23:25:28.865209,,True,,,0,False,False,0,,
91354,91354,df73982b-5aa7-4cfc-9374-40f016652d94,renaud_capucongautier_capuconfrank_braley,Renaud Capucongautier Capuconfrank Braley,2019-04-10 22:26:17.504720,2020-04-07 15:30:49.093605,,True,,,0,True,False,1,,
91709,91709,ea4da749-eab4-4359-9c26-bad2c90403d9,renaud_capucongautier_capuconnicholas_angelich,Renaud Capuçongautier Capuçonnicholas Angelich,2019-04-10 22:56:23.356138,2020-04-07 15:31:09.168588,,True,,,0,True,False,1,,
91936,91936,7d6898fc-6690-454f-92ae-baef14f1a4f8,staatskapelle_de_dresdedaniel_hardingrenaud_ca...,Staatskapelle De Dresdedaniel Hardingrenaud Ca...,2019-04-10 23:19:19.813299,2020-04-07 15:27:46.825796,,True,,,0,True,False,1,,
91940,91940,8f630e74-5c0e-4325-a708-1caf68409a1f,les_grands_solistesgautier_capucongabriela_mon...,Les Grands Solistesgautier Capuçongabriela Mon...,2019-04-10 23:19:19.702935,2020-04-07 15:28:20.046161,,True,,,0,True,False,1,,
91952,91952,845498f4-98ab-4def-b1a7-d716c493e6dc,renaud_capuconnicholas_angelichles_grands_soli...,Renaud Capuconnicholas Angelichles Grands Soli...,2019-04-10 23:25:42.967559,2020-04-07 15:27:59.407880,,True,,,0,True,False,1,,
145377,145377,d414cdfd-b2c4-4d9f-adce-7c1a6d03d162,renaud_capucon,Renaud Capuçon,2019-04-05 22:00:26.753946,2020-06-26 09:40:24.063956,,True,,,0,False,False,0,,99.0
154805,154805,c0aa2525-6986-43ff-b8df-30620b30b670,gautier_capucon_capucon,Gautier Capucon (Capuçon),2020-04-10 01:30:54.770641,2020-04-10 01:30:54.770641,,True,,,0,True,False,0,,


### IDs 

- Gauthier Capuçon : 149b70fa-a190-4a83-bea9-38dafc1d4c5c
- Renaud Capuçon : d414cdfd-b2c4-4d9f-adce-7c1a6d03d162