In [1]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import re
import urllib.request
import json
from tqdm.auto import tqdm

# Query Wikipedia

In [2]:
def query_wikipedia(page_name):
    baseurl = "https://en.wikipedia.org/w/api.php?"
    action = "action=query"
    title = ('titles=')+urllib.parse.quote_plus(page_name).replace(' ','_')
    content = "prop=revisions&rvprop=content&rvslots=*"
    dataformat ="format=json"

    query = "{}{}&{}&{}&{}".format(baseurl, action, content, title, dataformat)
    wikiresponse = urllib.request.urlopen(query)
    wikidata = wikiresponse.read()
    wikitext = wikidata.decode('utf-8')
    wikijson = json.loads(wikitext)

    pageid = list(wikijson['query']['pages'].keys())[0]
    description = wikijson['query']['pages'][pageid]['revisions'][0]['slots']['main']['*']
    
    return description

# [All Artists on Genius](https://genius.com/artists)

In [3]:
# generate baseline list of musical artists to compare to
genius_artists = []
with open('genius_artists.txt', encoding='utf-8') as f:
    for line in f:
        line = line.replace(u'\u200b', '')[:-1] # Remove end of line and zero width space character
        line = re.sub(r'\([^)]*\)', '', line).rstrip() # Remove parenthesis (...)
        genius_artists += [line]

In [4]:
len(genius_artists)

18770

### Enrich list with ... (singer) and ... (band)

In [4]:
genius_artists_enriched = genius_artists + \
                          [x+' (singer)' for x in genius_artists] + \
                          [x+' (band)' for x in genius_artists]

### Filter list if a page exist on Wikipedia with the artist's name

#### Markdown to avoid running the cell (5h long)
```python
genius_list = []
fail_list = []
dic_redirect = {}
for artist in tqdm(genius_artists_enriched):
    try:
        description = query_wikipedia(artist)
        # If redirect
        page_redirect = re.findall(r'#[Rr][Ee][Dd][Ii][Rr][Ee][Cc][Tt]\s*\[\[(.*?)(?:#.*?)?(?:\|.*?)?\]\]', description)
        if page_redirect:
            artist_redirect = page_redirect[0]
            dic_redirect[artist_redirect] = artist
            genius_list += [artist_redirect]
        else:
            genius_list += [artist]
    except:
        fail_list += [artist]

# Remove duplicates and save
genius_list_dic = dict.fromkeys(genius_list)
with open('genius_list_enriched.json', 'w') as f:
    json.dump(genius_list_dic, f)
    
with open('redirect_enriched.json', 'w') as f:
    json.dump(dic_redirect, f)
```

## Cleaning

In [5]:
with open('genius_list_enriched.json') as f:
    genius_list_dic = json.load(f)

In [6]:
len(genius_list_dic)

12022

In [7]:
# Remove '... (disambiguation)'
list_disambiguation = []
for k, v in genius_list_dic.items():
    if 'disambiguation' in k.lower():
        list_disambiguation += [k]

for k in list_disambiguation:
    genius_list_dic.pop(k, None)

In [8]:
# Remove 'List of ...'
list_list_of = []
for k, v in genius_list_dic.items():
    if 'list of' in k.lower():
        list_list_of += [k]

for k in list_list_of:
    genius_list_dic.pop(k, None)

In [9]:
# Remove '... discography'
list_discography = []
for k, v in genius_list_dic.items():
    if 'discography' in k.lower():
        list_discography += [k]

for k in list_discography:
    genius_list_dic.pop(k, None)

In [10]:
less_3_letters = [k for k in genius_list_dic.keys() if len(k) <=3]
keep_3_letters = ['ATB', 'BTS', 'B2K', 'BoA', 'Bôa', 'B2K', 'Ceg', 'Cyn', 'D12', 'D4L', 'DMX', 'Exo', 'EZI',
                 'GZA', 'iLe', 'JID', 'JLS', 'KMD', 'KSI', 'MØ', 'Mýa', 'Nas', 'Nek', 'Psy', 'RBD', 'RZA',
                 'SZA', 'SWV', 'S-X', 'Tix', 'U2', 'UGK', 'Unk', 'Us3', 'XTC', 'Y&T', 'Yas', '2PM', '3LW',
                 '3T', 'IMx']

for k in less_3_letters:
    if k not in keep_3_letters:
        genius_list_dic.pop(k, None)

In [11]:
list_deleted = ['Rolling Stone', 'United States', 'Spotify', 'Grammy Awards', 'United Kingdom',
                'Saturday Night Live', 'Interscope Records', 'Chicago', 'Japan', 'Philadelphia',
                'Vice (magazine)', 'Paris', 'Boston', 'Europe', 'Texas', 'India', 'Berlin', 'The Simpsons',
                'Soviet Union', 'Sesame Street', 'Milan', 'Rome', 'Nickelodeon', 'Cartoon Network',
                'Empire Distribution', 'Harlem', 'Genius (website)', 'Phonograph record', 'Survivor',
                'Glee (TV series)', 'Bible', 'United Nations', 'South Park', 'Asia', 'Family Guy',
                'Martin Luther King Jr.', 'Homer', 'Saint Petersburg', 'Universal Pictures', 'Animaniacs',
                'Cash Money Records', 'Alabama', 'Edgar Allan Poe', 'Charles Dickens', 'Walt Disney Records',
                'Law & Order: Special Victims Unit', 'United States Congress', 'Internet', 'Indiana',
                'Monty Python', 'Nintendo', 'Game of Thrones', 'J. R. R. Tolkien', 'Nepal', 'Oscar Wilde',
                'League of Legends', 'Beirut', 'Harry Potter', 'Jimmy Fallon', 'Liverpool F.C.',
                'New West Records', 'Compilation album', 'Cinderella', 'How I Met Your Mother', 'Freestyle',
                'Odyssey', 'Friends', 'Ubisoft', 'Zeus', 'Friedrich Nietzsche', 'SpongeBob SquarePants',
                'George Orwell', 'James Corden', 'Kansas', 'Pokémon', 'Star Trek', 'Young Money Entertainment',
                'Twin Peaks', 'Virginia Woolf', 'Voltaire', 'William Wordsworth', 'BoJack Horseman',
                'Adventure Time', 'Grand Hustle Records', 'Top Dawg Entertainment', 'UEFA Champions League',
                '2014 FIFA World Cup', 'Quality Control Music', 'Emily Dickinson', 'Valencia', 'Barbie',
                'United Service Organizations', 'Fiji', 'J. K. Rowling', 'One Piece', 'William Shakespeare',
                'Eden Project', 'Gangnam Style', 'Chicago hip hop', "Rock the Ocean's Tortuga Music Festival",
                'United States Department of Agriculture', 'Federal government of the United States',
                'Band Aid', 'We Are the World', "Wild 'n Out", 'Train', 'WandaVision', 'Gyalis', 'Cannon',
                'Witchcraft', 'Geoffrey Chaucer', 'Elizabeth I', 'Toyotomi Hideyoshi', 'Edmund Spenser',
                'Thomas Morley', 'John Donne', 'John Webster', 'Pierre Corneille', 'Pierre Corneille ',
                'Giles Corey', 'Giacomo Casanova', 'Toussaint Louverture', 'Johann Wolfgang von Goethe',
                'William Blake', 'Friedrich Schiller', 'Robert Burns', 'Jane Austen', 'Washington Irving',
                'Lord Byron', 'Percy Bysshe Shelley', 'John Keats', 'Mary Shelley', 'Adam Mickiewicz',
                'Victor Hugo', 'Ralph Waldo Emerson', 'Nathaniel Hawthorne', 'Cochise', 'Elizabeth Barrett Browning',
                'Henry Wadsworth Longfellow', 'Abraham Lincoln', 'Alfred, Lord Tennyson', 'Robert Browning',
                'Charlotte Brontë', 'Frederick Douglass', 'Henry David Thoreau', 'Emily Brontë', 'Herman Melville',
                'Walt Whitman', 'George Frideric Handel', 'Henry Purcell', 'Slavery', 'Orson Welles', 'Enslaved',
                'Brit milah', 'Books of Kings', 'Josiah', 'Phinehas', 'Solomon', 'Josiah', 'Isaiah', 'Omri',
                'Queen of Sheba', 'Joab', 'Immanuel', 'Ezekiel', 'Jacob', 'Book of Joshua', 'Shrimp', 'Indian Ocean',
                'Fish', 'Megabat', 'Tropical cyclone', 'Ocean', 'Kolkata', 'Zambezi', 'Island', 'El Niño',
                'Toni Morrison', 'Adrienne Rich', 'Akua Naru', 'Amiri Baraka', 'Chloe', 'Haruki Murakami',
                'James Baldwin', 'William Faulkner', 'Margaret Atwood', 'Maya Angelou', 'Terror Squad',
                'controlled-access highway', 'Orléans', 'Nile', 'Kashmir', 'Traffic', 'Attila', 'Hale',
                'Akhenaten', 'Water', 'Akhmim', "New Year's Day", 'Orders of magnitude (length)', 'Plan B', 
                'Winston Churchill', 'Valley', 'Zambezi', 'Types of volcanic eruptions', 'Himalayas',
                'Rings of Saturn', 'Chrysopogon zizanioides', 'Ali Zafar', 'Daler Mehndi', 'Cricket',
                'Kashmir', 'Medina', 'Nickelback', 'Nusrat Fateh Ali Khan', 'Imran Khan', '1982', '1968',
                'Zone', 'Worlds Apart', 'Wende', 'Vault', '24-hour clock', '24 Hours', 'Zoro', 'Zach Williams',
                'Enrique González', 'Peach-Pit', 'Rozen Maiden', 'Zwei', 'Hush', 'The Gazette', 'Loonie',
                'Karuka', 'Enemy', 'Rivalry', 'Villain', 'Off with Their Heads', 'Defiance, Ohio', 'Ultimo',
                'Cheek', 'face', 'cotton swab', 'Visage', 'Russian hip hop', 'Chambered nautilus', 'Lumen',
                'Cherrie', 'Darin', 'Eivor', 'Vamp', 'Ulrike', 'Erin', 'Frederick', 'Irina', 'Typhoon', 
                'Opus', 'Orchidaceae', 'Hymenopus coronatus', 'Saba', 'Omnia', 'Hedwig and the Angry Inch (musical)',
                'Pile', 'The Spirit of the Beehive', '7_Dwarves_–_Men_Alone_in_the_Wood', 'Santiano', 'Jack',
                'Genesis', 'Digimon', 'keshi', 'Ultra Q', 'Corona', 'Architect', 'Tulus', 'Anonymity', 'Anonymous', 'Anonymous work',
                'Fuego', 'Howling', 'Russian jokes', 'Fantômas', 'AK-47', 'Gulag', 'Fuck', 'Franco',
                'Welcome to the Jungle (Franco "El Gorila" album)', '1995', "Let's Go Brandon", 'Pepe',
                'Warner Music Latina', 'Lute', 'Molotov', 'Mora', 'Morat', 'Ramón', 'Un Corazón', 'Mallu Magalhães (2008 album)',
                'Vincent', 'Jordy', 'JORDY', 'Fanny', 'Tsoi', 'Sturm, Ruger & Co.', 'Girl', 'Woman', 
                'Daughter', 'Family', 'Father', 'Immunity (medical)', 'Idaho', 'Reindeer', 'Cursive',
                'Ernest Hemingway', 'Grizzly bear', 'Dora the Explorer', 'Hunting', 'Gilbert and Sullivan',
                'Snow', 'Vikings', 'Zorro', 'Fisher-Price', 'LazyTown', 'The Backyardigans', 'Winx Club',
                'Dime (United States coin)', 'Finch', 'Bonobo', 'Taiga', 'Shiva', 'Goose', 'Wild river',
                'Avatar', 'Blue', 'Chakra', 'Gloria E. Anzaldúa', 'Kali', 'Parvati', 'Yogi', 'Guru',
                'Nirvana', 'Nothing', 'yama', 'Question (character)', 'WALL-E', 'Ceremony', 'Veronica Roth',
                'Inquisition', 'idealism', 'Otto', 'Zippo', 'Nada', 'Vacuum', 'Void', 'Wavelength', 'Navy blue',
                'Inanna', 'Black', 'Nero', 'American football', 'Riverdale (2017 TV series)', 'TLC (group)',
                'WAZE-TV', 'Tottenham Hotspur F.C.', 'Brazil national football team', 'West Ham United F.C.',
                'Zinedine Zidane', 'Euro', 'Arsenal F.C.', 'Roberto Carlos', 'Zé Roberto', 'Guti', 'Xavi',
                'INDECT', '21st century', 'Professor', '2007', 'Fatah', 'Orphan', 'Victory', 'Athena',
                'Ancient Mesopotamian underworld', 'Marduk', 'Priest', 'Underworld', 'Xibalba', 'Isis',
                'Bastille', 'Deicide', 'Egyptian hieroglyphs', 'Horus', 'Ares', 'Petra', 'Augustus',
                'Ovid', 'Hector','Gaia', 'Muses', 'Emperor', 'Scylla', 'Horace', 'Tsar', 'Palace', 'Patronage',
                'Propaganda', 'Sivas', 'Cold', 'Low-life', 'Augustine of Hippo', 'Cage', 'Palace of Versailles',
                'Leto', 'Dido', 'Divine Comedy', 'Fetus', 'Icarus', 'Styx', 'Titus Andronicus', 'Rodriguez',
                'Justice', 'United States Department of Justice', 'Paris, Texas', 'Interpol', 'Illegal drug trade',
                'The Bahamas', 'Theocracy', 'Futurama', 'The Walking Dead (video game)', 'Valve Corporation',
                'Doki Doki Literature Club!', 'Undertale', 'Yandere Simulator', 'Random Encounters', 'Yuri', 
                "Five Nights at Freddy's", 'Rick and Morty', 'TWRP', 'The Yogscast', 'Gastrointestinal tract',
                'Heart', 'In vivo', 'Ivory', 'Fluoxetine', 'Raccoon', 'Organ (biology)', 'Weaning', 'Vida', 
                'Robert Bosch GmbH', 'Characters of Final Fantasy XV', 'Video', 'The Calling', 'Zlatan', 
                'The Devil Makes Three', 'Cheating in video games', 'Drew Gehling', 'Introduction', 'Malcolm X', 
                'Grand Theft Auto: San Andreas', 'Rock Star', 'Rockstar Games', 'Rusty Cage', 'Unbreakable Kimmy Schmidt',
                'King', 'The Nightmare Before Christmas', 'Báthory family', 'Herzog', 'Rígsþula', "Catherine O'Hara",
                'Alan_Wake', 'Naruto', 'Naruto Uzumaki', 'Limbo', 'Pixie', 'Seamus Heaney', 'Hidenari Ugaki',
                'E-Girls Are Ruining My Life!', 'Joost', 'Atmosphere of Earth', 'Atmosphere', 'Xenon',
                'Extraterrestrial life', 'American Dad!', 'Elohim', 'The Flintstones', 'Puppet', "Bob's Burgers",
                'Ollie', 'Teddy', '20th Century Studios', 'Sony Pictures', "It's Always Sunny in Philadelphia",
                'Hughes Entertainment', 'The Partridge Family', 'Chevrolet', 'Johnny_Cash', 'Vega', 'Luciano',
                'Star', 'Polaris', 'X-ray', 'W. H. Auden', 'Hoshi', 'Gamma ray', 'Light', 'Microwave',
                'Television', 'phosphorescence', 'Universe', 'Timeline of the far future', 'Future',
                'Gabriel', 'Behemoth', 'Burial', 'Noah', 'Paradise Lost', 'Nazareth', 'T. S. Eliot',
                'Symposium (Plato)', 'Hava', 'Númenor', 'funeral', 'Cemetery', 'cemetery', 'Death in June',
                'Crypt', 'Death', 'Floristry', 'Banquet', 'Obituary', 'Ghoul', 'Hammock', 'Upper class',
                'Visigoths', 'Dijon', 'Burgos', 'Gregorian chant', 'Gdańsk', 'Evan', 'Vanna', 'Love',
                'Gdańsk', 'Mam talent! (series 4)', 'Zbigniew Brzezinski', 'Armand Hammer', 'Loving', 
                'Walls', 'Tender', 'Quincy', 'Picture This', "O'Shea", 'Shay', 'Night Club', 'Hannibal Buress',
                'Loose Ends', 'Insanity', 'Madness', 'Vaas Montenegro', 'Günther', 'Flight', 'Wing', 'Thou',
                'Elias', 'Helvetia', 'Duster', 'Delirious', 'David Foster Wallace', 'Ozymandias', 'Sport', 'Tennis',
                'Zadie Smith', 'Parks and Recreation', 'The Muppets', 'The Jim Henson Company', 'Madrigal',
                'Cult', 'Quest', 'Tristan', 'Jim Jones', 'Celeste', 'Baron', 'New Model Army', 'Parliament',
                'Azteca', 'Coco', 'Arash', 'Broadcasting', 'Noise', 'Bullying', 'Gossip', 'Hatred', 'X-Men',
                'The X Factor Philippines (season 1)', 'Alonzo', 'Loud', 'Pomme', 'Rema', 'Éric Zemmour',
                'Simi', 'On Fleek (Eva song)', 'Élodie', 'Gianni', 'Lucero', 'Madame', 'Critical', 'Copypasta',
                '4chan', 'Editing', 'Hatsune Miku', 'Sega', 'Nyan Cat', 'Hololive Production', 'Caramelldansen',
                'Mario', 'Ghost', 'George R. R. Martin', 'Wali', 'Ghost ship', 'Order of Assassin', 'Undead',
                'Monster High', 'Abraham in Islam', 'Umar', 'Wudu', 'Fard', 'Quakers', 'The Goonies', 'The Office',
                'Arrested Development', 'Olivia', 'University of Nevada, Las Vegas', 'CECA', 'citizenship',
                'Tradition', 'Coby', 'Duman', 'Falco', 'Lunatic', 'Nazar', 'Pamela', 'Rasta', 'Severina', 'Tarkan',
                'Papon', 'Pentagram', 'Pentangle', 'Religious music', 'Shaan', 'Zubin', 'Vice', 'Colony House', 
                'Talaash: The Answer Lies Within', 'Fuzon', 'Bethel Music', 'Bushido', 'Virtue', 'Tool', 'health',
                'Vital signs', 'Pain', 'Self-esteem', 'Fleurie', 'Lynda', 'Plumb', 'Ruelle', 'Rockwell',
                'Kano', 'Intelligence quotient', 'Ted Kaczynski', 'Hoodie', 'Will Ferrell', 'Sylvia Plath', 
                'eNCA', 'Lale', 'Vasto', 'Basement', 'Bedroom', 'House', 'Matilda the Musical', 'Zelle', 
                'Flex', 'Khaled', 'Naza', 'Garou', 'Soprano', 'Angèle', 'Wallen', 'Vianney', 'Göksel',
                'Gülşen', 'Teoman', 'Anitta', 'Anthony Hamilton', 'Ashanti', 'Ashe', 'Avant', 'Avatar: The Last Airbender',
                'Avatar: The Last Airbender (season 3)', 'Bazzi', 'bülow', 'Buju', 'Cameo', 'Carl Thomas', 'Case',
                'Cassidy', 'Cassie', 'The Cheetah Girls', 'Cheque', 'Cherish', 'Chip', 'Christian French',
                'Christopher', 'Common', 'Drake', 'Danny Ocean', 'Daya', 'Deno', 'Deuce', 'Disclosure',
                'Donnie', 'Elaine', 'Estelle', 'Example', 'Fletcher', 'Fergie', 'Foxx', 'Frances', 'Fredo',
                'Gallant', 'The Garden', 'Grace Carter', 'Grace Davies', 'Griff', 'Halsey', 'Hannah',
                'In Real Life', 'Joji', 'Jake Miller', 'Jamie Miller', 'John Newman', 'Kent Jones',
                'Kida', 'Larry', 'Lemonade Mouth (soundtrack)', 'Lemonade Mouth ', 'Lloyd', 'Loreen',
                'Mabel', 'Mahalia', 'Mavado', 'Maxwell', 'Monica', 'Murda', 'Murs', 'Nneka', 'Olly', 
                'The Pack', 'Petey', 'Heatwave (Robin Schulz song)', 'Ruel', 'Sade', 'The Score', 'Shaggy',
                'Sigrid', 'Sleigh Bells', 'Snoop', 'Solange', 'Standing on the Corner', 'Goosebumps (Travis Scott song)',
                'Teni', 'Tora', 'Usher', 'Vargas', 'WZRD', 'Youngblood Hawke', 'Yuna', 'Zayn', 'Zapp', '2gether',
                'Golden Child', 'Infinite', 'Solar', 'Tiffany', 'Tiffany (given name)', 'Adrian', 'Alphaville', 
                'Aquilo', 'Babe Rainbow', 'Balthazar', 'Bath', 'Bauhaus', 'Belly', 'Birdy', 'Birthday Party',
                'Black Angels', 'Blur', 'Bosh', 'Bright Eyes', 'Buddy', 'Church', 'Coil', 'Dave', 'Damien', 
                'Dead or Alive', 'DeMarco', 'Duffy', 'Electric Youth', 'Elysian Fields', 'Enigma', 'Feeder', 
                'Feist', 'Fink', 'Foster', 'Frou Frou', 'Fuzz', 'Gang of Four', 'Gazpacho', 'Gregorian', 'Grimes',
                'Hannah Cohen', 'Hinds', 'Holy Holy', 'Horsey', 'Ian McCulloch', 'Icehouse', 'Idlewild', 'Indochine',
                'James', 'José González', 'Keane', 'Lincoln', 'Lush', 'Lustra', 'Madrugada', 'Metric',
                'The National', 'New Order', 'Nico', 'Numb', 'Orbit', 'Orbital', 'Paper Chase', 'Paradis', 
                'Patrick Watson', 'Pavement', 'Phantogram', 'Phoenix', 'Plumtree', 'Porches', 'Porter', 'Portishead',
                'Presence', 'Pretenders', 'Pulp', 'Replacement', 'Ride', 'Royal Blood', 'Rufus', 'Scott Walker',
                'Sparks', 'Squeeze', 'Talco', 'Tamino', 'The Fall', 'Travis', 'Unspoken', 'Vacationer', 'Vector',
                'The View', 'Villager', 'The Vines', 'Wang Chung', 'White Lies', 'Whitney', 'William Carlos Williams',
                'Wolfsheim', 'Woods', 'Wye Oak', 'Yazoo', 'Yseult', 'Yuck', 'Zola', 'Zammuto', 'Zombie Nation',
                'Aqua', 'Dardan', 'Fantasia', 'Foxy Brown', 'Gabrielle', 'Gala', 'Gavin James', 'Georgio',
                'Gigi', 'Hanson', 'H-Town', 'Hue and Cry', 'Incognito', 'Intruders', 'Irma', 'James Morrison', 'Jewel',
                'John Park', 'Kevin', 'Lawrence', 'Melanie', 'Michael Schulte', 'Next', 'Nightcrawler', 'Nivea',
                'Oceana', 'Oliver', 'Pietro Lombardi', 'Raisa', 'Sarah Connor', 'Scooter', 'Seal', 'Smokie',
                'Step', 'Uh Huh Her', 'Vesta', 'Wiktoria', 'Accept', 'Alice Cooper', 'All That Remains', 'Almighty',
                'Alpha Wolf', 'Annihilator', 'As I Lay Dying', 'James Joyce', 'Babes in Toyland', 'Battle Beast',
                'Beartooth', 'Black Flag', 'Blood on the Dance Floor', 'Bow Wow', 'Bush', 'Carcass', 'Carnage',
                'Carnifex', 'Chevelle', 'Code Orange', 'Converge', 'Counterpart', 'Crumb', 'Crystal Lake', 
                'Damned', 'Daughtry', 'The Devil Wears Prada', 'Discharge', 'Disturbed', 'Dope', 'Down', 'Eloy',
                'Elvenking', 'Emanuel', 'Emery', 'Entombed', 'Epica', 'Equilibrium', 'Extreme', 'Fade', 'Falconer',
                'The Fall of Troy', "Fiddler's Green", 'Filter', 'Flaw', 'Flyleaf', 'Focus', 'Foreigner',
                'Full of Hell', 'The Gathering', 'Germ', 'The Ghost Inside', 'Gojira', 'Grady', 'Guess Who', 'Haken',
                "Harm's Way", 'Hella', 'Helmet', 'Hope', 'El Dorado', 'Hundredth', 'Hurt', 'Ian Hunter', 
                'I Declare War', 'Immolation', 'Immortal', 'In Mourning', 'Interval', 'Iron Mask', 'Issue',
                'Jakob', 'Jethro Tull', 'King Diamond', 'Klaatu', 'Lindemann', 'Listener', 'Mad Season', 'Mayhem',
                'Ministry', 'Misfits', 'Mr. Big', 'Natalie Taylor', 'Nena', 'Nitro', 'Obscura', 'Ocean Grove',
                'Onslaught', 'Otherwise', 'Overkill', 'Panther', 'Periphery', 'Possessed', 'Power Trip', 'Primus',
                'Puya', 'Queen', 'Rage', 'Riverside', 'Running Wild', 'Rush', 'Scorpion', 'Zodiac', 'Orbital inclination',
                'Silverstein', 'Slash', 'Sodom', 'Steppenwolf', 'Tesla', 'Testament', 'Theory', 'Therion', 'Thornhill',
                'Tremonti', 'Trivium', 'Trouble', 'Unbroken', 'Union', 'Unleashed', 'Uriah Heep', 'Vader', 'Vektor',
                'Warning', 'Whirr', 'Whitehouse', 'White Zombie', 'Will', 'Winger', 'Yvette', 'Zimmer', '38 Special',
                'Blondie', 'Corey Hart', 'Cracker', 'David Gray', 'Dawes', 'Dispatch', 'Dylan', 'Empire of the Sun',
                'Engelbert Humperdinck', 'Faust', 'George Baker', 'Hozier', 'The Heavy', 'Japa', 'Jeanette',
                'Kensington', 'kent', 'Lamp', 'Louane', 'Modest Mouse', 'Paul Kelly', 'Player', 'Quimby', 'Randy',
                'Redbone', 'Richard Thompson', 'Robert Palmer', 'Roy Harper', 'Santana', 'Sticky Fingers', 
                'Sting', 'Suede', 'Tom Jones', 'Toto', 'Wednesday', 'Thursday', 'William Bell', 'Black Moon',
                'Khalid', 'Little Brother', 'Miguel', 'Nasty', 'Beyond', 'Four Seasons', 'Looking Glass',
                'Minnelli', 'Nancy Wilson', 'Nathan Evans', 'Raveena', 'Swan', 'The Ugly Duckling', 'Stockard Channing',
                'Thomas Sanders', 'Tiny Tim', 'Token', 'Vikingarna', 'atlas', 'Brett Young', 'Chris Young',
                'Emilio', 'Frog', 'Gary Stewart', 'Home Free', "I'm with Her", 'Jessi', 'John Williamson',
                'Jordan Davis', 'Leeland', 'Little Big', 'Midland', 'Morgan Wade', 'Old Dominion', 'Riley Green', 
                'Trey Lewis', 'Wanda', 'When in Rome', 'Waylon', 'Yola', 'Junoon', 'Mustafa', 'Aventura', 'Calle 13',
                'Dream', 'Sleep', 'Incubus', 'Farina', 'Haggard', 'Pedro Fernández', 'Sech', 'Selena (given name)',
                'Kalash', 'Against the Current', 'As It Is', 'Bayside', 'Belmont', 'Brand New', 'Casey', 'Charmer',
                'Creeper', 'The Dangerous Summer', 'Famous Last Words', 'Fresh', 'Goldfinger', 'Good Riddance',
                'Grayscale', 'The Green', 'Hedley', 'Ignite', 'I Hate Myself', 'Jank', 'Jawbreaker', 'Julia Brown',
                'La Dispute', 'Lifehouse', 'Lost', 'Ludo', 'Marietta', 'Movement', 'Murder by Death', 'New Politics',
                'The Night Café', 'Nightly', 'Operation Ivy', 'Orange', 'Owen', 'Ozma', 'Pennywise', 'Pepper', 
                'Rancid', 'Real Friends', 'Rebelution', 'Say Anything', 'Set It Off', 'The Story So Far', 'Sublime',
                'Tessa', 'Transit', 'Trevor Hall', 'Turnover', 'Verse', 'Weathers', 'WSTR', 'Young Guns', 
                'Youth Brigade', '7 Seconds', 'Hyde', 'Strange Case of Dr Jekyll and Mr Hyde', 'Jala', 'Mark Forster',
                'Nico Santos', 'Maan', 'Nielson', 'December Avenue', 'Zsa Zsa', 'Anna', 'Chic', 'Emin', 'Finn',
                'Jack Johnson', 'Journey', 'Pat Carroll', 'Voyage', 'Everclear', 'Faster', 'Ghost Town', 'Last Child',
                'Richter', 'Terror', 'Bleacher', 'Crush', 'The Good Life', 'Redman', 'Free', 'Hannes', 'Dion', 'Dixie',
                'Elizabeth', 'Outlaws', 'Sabaton', 'Emigration', 'Exodus', '1914', 'Archduke Franz Ferdinand of Austria',
                'Bohemia', 'Bóbr', 'Dragon Ball Super', 'Trunks (Dragon Ball)', 'Ferre Gola', 'Whirr (band)', 
                'Gunna', 'Volume Ten', 'Bausa', 'Third Strike', 'rizza', 'Horse Head', 'Virtual self', 
                'Dabin', 'Olson', 'Captain Murphy', 'Amy MacDonald']
                 
for k in list_deleted:
    genius_list_dic.pop(k, None)

In [12]:
len(genius_list_dic)

10191

### Save result

In [13]:
with open('genius_list_enriched_cleaned.json', 'w') as f:
    json.dump(genius_list_dic, f)

In [14]:
with open('genius_list_enriched_cleaned.json') as f:
    load = json.load(f)

In [15]:
len(load)

10191

In [16]:
for k in load.keys():
    print(k)

ABBA
AC/DC
Adele
Alec Benjamin
Die Länderbahn
Alice in Chains
alt-J
Aminé (rapper)
Amy Winehouse
Andrew Lloyd Webber
Ant Wan
Arashi
Arctic Monkeys
Ariana Grande
ASAP Ferg
ASAP Rocky
Ashnikko
Aurora (singer)
A1 x J1
Aaliyah
AaRON
Aaron Cole
Aaron Keyes
Aaron Lewis
Aaron Smith
Aaron Sorkin
Aaron West and the Roaring Twenties
Abida Parveen
Above and Beyond
Abra
abracadabra
Absence
Ab-Soul
Abstract
The Acacia Strain
The Academic
Aca Lukas
Ace Hood
Ace of Base
Ace of Hearts
A.Chal
Achille Lauro
187 Strassenbande
Acid Bath
Action Bronson
Adam and the Ants
Adam Calhoun
Adam Jensen
Adam Lambert
Adam Levine
Adam Mansbach
Adam Sandler
Addison Rae
Adekunle Gold
Adel
Adelitas Way
Adel Tawil
Ademo
Vossi Bop
Adie
Aditi Paul
Adora
Adrianne Lenker
Adriano Celentano
Adult Mom
Aerosmith
Aesop Rock
aespa
Afroman
Aga B
Against Me!
Agalloch
A. G. Cook
Agnes
Agnes Obel
Agnetha Fältskog
Agnostic Front
The Agonist
Agust D
a-ha
Aha Gazelle
Ahmet Kaya
Aidonia
Ailee
Aileen Quinn
Aimee Mann
Aimer
The Airborne Tox

Emotional Oranges
The Emotions
Emperor X
Empire! Empire! (I Was a Lonely Estate)
Empress Of
Empyrium
Emre Aydın
Enchantment
End zone
Enemy Inside
Engenheiros do Hawaii
England Dan & John Ford Coley
Enigma (rapper)
Ñengo Flow
Enhypen
Enema
European Union Agency for Cybersecurity
Enjoy
Ennio Morricone
Enrico Macias
Enrique Iglesias
Ensi
Ensiferum
Enterprise Earth
Enter Shikari
Enur
En Vogue
Enya
EPMD
Eppu Normaali
Era Istrefi
Eraserheads
Erasure
Irfan (name)
Erica Banks
Eric B. & Rakim
Eric Bellinger
Eric Benét
Eric Bibb
Eric Bogle
Eric Carmen
Eric Church
Eric Clapton
Eric Idle
Eric Johnson
Erick Sermon
Erick Arc Elliott
Eric Nam
Eric Prydz
Eric Reprid
Eric Roberson
Eric Whitacre
Erigga
Erika de Casier
Erika Jayne
Erik Lundin
Erik & Kriss
Erik Petersen
Erik Santos
Erkan Oğur
Erkin Koray
Ernest Tubb
Ernst Busch
Erol Evgin
Eros Ramazzotti
E-Rotic
Erra
Erste Allgemeine Verunsicherung
Erykah Badu
Eryn Allen Kane
Escape the Fate
E Sens
Esham
Eshon Burgundy
Eskimo Callboy
Eskin
Esko
Eskorbuto


The Irrepressibles
Irving Berlin
Irwin Goodman
Isaac Dunbar
Isaac Gracie
Isaac Hayes
The Isaacs
Isaac Waddington
Isabel Allende
Isabela Merced
Isabella Crovetti
Isabelle Boulay
Isabel Pantoja
Isac Elliot
Isadora Pompeo
Isak Danielson
I See Stars
I Set My Friends on Fire
International_Students_of_History_Association
IshDARR
Icelandic króna
Islamic Force
The Island, Croydon
Isles & Glaciers
The Isley Brothers
Isley-Jasper-Isley
Ismael Rivera
Ismael Serrano
Ismo
Ismo Alanko
Ison & Fille
Ison
Israel Houghton
Israel Kamakawiwoʻole
Israel Nash
Israel Vibration
Issa Gold
Issam
Issa Twaimz
Isyana Sarasvati
ItaloBrothers
Italo Calvino
Itchy (band)
The Itchyworms
I the Mighty
Kanako Itō
It Looks Sad.
Ivana
Ivan & Alyosha
Ivan Graziani
Ivano Fossati
Ivan Rebroff
Ivete Sangalo
Ivor Cutler
Ivorian Doll
Ivor Novello
Ivory Hours
Hedera
Ivy Adara
Ivy Levan
The Ivy
Ivy Queen
Ivy Sole
Iwan Fals
iwrestledabearonce
Iyeoka Okoawo
Iz*One
Izzy Bizu
Izzy Camina
Izzy Stradlin
Jack Harlow
Jaden
James Blake
Jay-

Mister V
Mr. Vegas
MisterWives
Mister You
Misty Edwards
MKTO
Mónica Naranjo
Mobb Deep
Moby
Model
Modern Baseball
The Modern Lovers
Modern Talking
Modjo
Mo-Do
Mogwai
Moha La Squale
Mohammed Rafi
Moha
Mohit Chauhan
Moira Dela Torre
MOJOFLY
Elephant_in_the_room
Mokoma
The Moldy Peaches
Moli
Molly Nilsson
Molly Sandén
Molo
Mom_Jeans
Monaleo
Monali Thakur
Dardan (rapper)
Money Boy
Money Man
Monica Zetterlund
The Monkees
Mon Laferte
Mons
Monsieur Periné
Montana of 300
Montell Jordan
Montez
Monty
The Moody Blues
Moonchild Sanelly
Moose Blood
Track Star (song)
M.O.P.
Murad
Morbid Angel
Morcheeba
Morissette (singer)
Moro
Morphine
Morray
Morrissey
Morrisson
morten
Mor ve Ötesi
Mosaic MSC
Moscow Death Brigade
Moses Sumney
MoStack
Mother Goose Club
Mother Mother
The Mothers of Invention
Motion City Soundtrack
Motionless in White
Motive
General Motors Motorama
Motörhead
Mott the Hoople
The Mountain Goats
Mount Eerie
Mowgli
Mozzik
Mozzy
Müslüm Gürses
Mount Joy
Mötley Crüe
Muddy Waters
Mudi
Mudvayne


Roger Miller
Roger Taylor
Roger Waters
Roger Whittaker
Rohff
Roi Heenok
Ro James
Roky Erickson
Rolf Harris
Rolf Zuckowski
Rollins Band
Romain Virgo
The Romantics
Romeo Santos
Roméo Elvis
Ronan Keating
RondoNumbaNine
The Ronettes
Roney
Ron Kenoly
Ronnie Flex
Ronnie Milsap
Ronny J
Ron Pope
Ron Suno
Roomies
Roo Panes
Roopkumar Rathod
Roosevelt
The Roots
Ro Ransom
Rory Gallagher
Ros Barber
Rosemary Clooney
Rosenfeld
Rose Royce
Rosie Tucker
Ross Lynch
Rostam
Rota
Rotimi
Rotting Christ
Rowdy Rebel
Roxen
Roxette
Roxy Music
Royal & the Serpent
Royal Republic
Roy Blair
Royce da 5'9"
Roy Jones Jr.
Roy Orbison
Roy Woods
RSAC
The Rubberbandits
Reuben
The Rubens
Rubio
Rubi Rose
Ruby Fields
Ruby Ibarra
Rucci
Rucka Rucka Ali
Rudimental
Rudimentary Peni
Rudy Francisco
Rudy Ray Moore
Rufus Wainwright
Ruger Hauer
The Rumjacks
The Runaways
Run-DMC
Runrig
Run the Jewels
Runtown
RuPaul
Rupert Holmes
Rural internet
Russell Dickerson
Russ Millions
Rusted Root
Ruston Kelly
Ruten
Ruth B.
recreational vehicle
R

What So Not
Wheatus
Wheel
Ben Hoffman
When Chai Met Toast
When Saints Go Machine
Whethan
Whigfield
While She Sleeps
Whiskey Myers
Whiskeytown
The Whispers
The White Buffalo
Whitechapel
White Cross
White Denim
White Fence
White Heart
White Lion
White Reaper
White ring
White Shoes & The Couples Company
Whitesnake
The Whitest Boy Alive
The Whitest Kids U' Know
White Swan
White Town
White widow
Whitewood
Whitey
Whitey Morgan and the 78's
The Whitlams
Whitney Avalon
Whodini
Lets Link
WhoMadeWho
Whores (band)
Who See
Wiatr
Wicca Phase Springs Eternal
Wicked Wisdom
Widespread Panic
Guido
Widowspeak
wifisfuneral
The Wiggles
Wig Wam
Wiki
Wilco
Wild
Wild Beasts
Wild Belle
Wild card
Wild cherry
Wild child
Wilderado
Bear Rinehart
Wildes
The Wild Feathers
Wildflower
Wild Nothing
Wild One
The Wild Party
The Wild Reeds
The Wild Swans
Wiley
Wilfred Owen
Wilhelminism
A Wilhelm Scream
Wilki
Wilkinson
Willa Cather
Willam Belli
Willamette Stone
Fraser Anning
Will Downing
Willem
Willemijn Verkaik
Will Hoge

Quakers (band)
Quest (band)
Quicksand (American band)
The Quill (band)
Quimby (band)
Quincy (band)
R5 (band)
Racoon (band)
RAM (band)
Ramirez (band)
Rancid (band)
Randy (band)
Real Estate (band)
Real Friends (band)
Rebelution (band)
Red (band)
Redbone (band)
Red Velvet (group)
Rehab (band)
The Replacements (band)
Ride (band)
Rings of Saturn (band)
Riverside (band)
Rodriguez (band)
Rome (band)
The Rose (band)
Roxen (band)
Royal Blood (band)
Rufus (band)
Running Wild (band)
Rush (band)
Slipknot (band)
System_of_a_Down
Sabaton (band)
Sade (band)
Santana (band)
Santiano (band)
Say Anything (band)
SCH (band)
Scooter (band)
The Score (band)
Scorpions (band)
SDP (duo)
Self Esteem (musician)
Set It Off (band)
SFDK (band)
Shellac (band)
Sid (band)
Silk (group)
Silverstein (band)
Skillet (band)
Slash (musician)
Sleep (band)
Sleigh Bells (band)
Smokey Mountain (band)
Smokie (band)
Snail Mail (musician)
Sodom (band)
Solomon (band)
Sparks (band)
The Spirit of the Beehive (band)
Spoon (band)
Squeeze

In [17]:
len(list_deleted)

1250