In [166]:
import pandas as pd
import re
import urllib.request
from bs4 import BeautifulSoup
import json
import time
import spacy

from __future__ import unicode_literals, print_function
import random
from pathlib import Path

import math
from collections import Counter

import datetime

import fuzzyset

In [174]:
def getArtistDetailsFromAZ(p_artist_name_bb):
    p_artist_group_az = ''
    p_artist_name_az = ''
    p_artist_url_az = ''
    p_accuracy = 0
    p_match_count = 0
    p_index_set = ''
    
    t_artist_name_bb = removeSingleCharStopWords(addSpaceAfterDot(p_artist_name_bb))
    
    for index, row in az_artist_data.iterrows():
        t_accuracy = 0
        t_compare_result = []
        
        t_artist_name_az = row['name_words']
        
        if len(t_artist_name_az) > 0:
            t_compare_result = getCosine(t_artist_name_az, t_artist_name_bb)
            t_accuracy = round(len(t_compare_result)/len(t_artist_name_bb), 2)
        else:
            t_accuracy = 0
        
        if t_accuracy > p_accuracy and t_accuracy != 0:
            p_match_count = 1
            p_accuracy = t_accuracy
            p_artist_group_az = row['artist_group']
            p_artist_name_az = row['artist_name']
            p_artist_url_az = row['artist_url']
            p_index_set = str(index + 1)
        elif t_accuracy == p_accuracy and t_accuracy != 0:
            p_match_count += 1
            p_artist_group_az = p_artist_group_az + '|' + row['artist_group']
            p_artist_name_az = p_artist_name_az + '|' + row['artist_name']
            p_artist_url_az = p_artist_url_az + '|' + row['artist_url']
            p_index_set = p_index_set + '|' + str(index + 1)
    
    return [p_artist_group_az, p_artist_name_az, p_artist_url_az, p_accuracy, p_index_set, p_match_count]

def generateAutoIDforDataFrame(df, df_column):
    df.insert(0, df_column, range(1, 1 + len(df)))
    return df

def compareSets(p_set_to_compare_with, p_set_to_compare):
    return pd.Index(p_set_to_compare_with).intersection(pd.Index(p_set_to_compare))

def removeSpecialCharacter(param):
    return re.sub('\W+', ' ', param)

def removeSingleCharacterWord(param):
    return list(filter(lambda x: len(x) > 1, param))

def addSpaceAfterDot(param):
    return re.sub('\.+', '. ', param).replace('  ', ' ')

def removeSingleCharStopWords(param):
    doc = nlp(param)
    
    wordset = []
    
    for token in doc:
        if len(token.text) > 1 and not token.is_stop:
            wordset.append(token.lower_)
    return wordset

def removeDuplicate(param):
    result = []
    for w in param:
        if w not in result:
            result.append(w)
    return result

def intersection(list_to_compare=[], list_set_to_compare_with=[]):
    # Returns a list (same lenght as list_set_to_compare_with) having multiple matched elements
    result_list = [list(filter(lambda x: x in list_to_compare, sublist)) for sublist in list_set_to_compare_with]
    
    # Get the max length of sub-list from the return list
    result_list_max = max(result_list, key=len)
    
    # Get all the index list based on result_list_max
    index_list = []
    if len(result_list_max) != 0:
        index_list = [index for index, row in enumerate(result_list) if row == result_list_max]
    
    return index_list, len(result_list_max)

def getCosine(vec1, vec2):
    common = set(vec1.keys()) & set(vec2.keys())
    numerator = sum([vec1[x] * vec2[x] for x in common])

    sum1 = sum([vec1[x]**2 for x in vec1.keys()]) 
    sum2 = sum([vec2[x]**2 for x in vec2.keys()]) 
    denominator = math.sqrt(sum1) * math.sqrt(sum2)
   
    if not denominator:
        return 0.0 
    else:
        return float(numerator) / denominator

print('Loaded Functions!')

Loaded Functions!


In [168]:
az_artist_data = pd.read_csv('data/az_artist_data.csv')
bb_artist_data = pd.read_csv('data/bb_artist_data.csv')

az_artist_data = generateAutoIDforDataFrame(az_artist_data, 'artist_id_az')

In [169]:
domain_az = 'https://www.azlyrics.com/'
domain_bb = 'https://www.billboard.com/'

artist_data = bb_artist_data\
    .loc[bb_artist_data['artist_category'].isin(['Artist'])]\
    .drop(columns=['artist_category'])\
    .rename(index=str, columns={
        'artist_id': 'artist_id_bb',
        'artist_url': 'artist_url_bb', 
        'artist_name': 'artist_name_bb'})\
    
artist_data = pd.merge(artist_data, az_artist_data, how='left', 
        left_on=['artist_name_bb'], right_on=['artist_name'])\
    .rename(index=str, columns={
        'artist_group_x': 'artist_group_bb',
        'artist_group_y': 'artist_group_az', 
        'artist_url': 'artist_url_az',
        'artist_name': 'artist_name_az'})

artist_data.head()

Unnamed: 0,artist_group_bb,artist_id_bb,artist_name_bb,artist_url_bb,artist_id_az,artist_group_az,artist_name_az,artist_url_az
0,A,275128.0,A B C & D of Boogie Woogie,/artist/275128/b-c-d-boogie-woogie,,,,
1,A,275133.0,A Band of Bees,/artist/275133/band-bees,,,,
2,A,6824928.0,A Lu A Zhuo,/artist/6824928/a-lu-a-zhuo,,,,
3,A,7665908.0,A Niu,/artist/7665908/a-niu,,,,
4,A,1481643.0,A Plus,/artist/1481643/a-plus,,,,


In [170]:
az_artist_data.head()

Unnamed: 0,artist_id_az,artist_group,artist_name,artist_url
0,1,A,A1,a/a1.html
1,2,A,A,a/a.html
2,3,A,Aaliyah,a/aaliyah.html
3,4,A,"Aalto, Saara",s/saaraaalto.html
4,5,A,Aaradhna,a/aaradhna.html


In [171]:
nlp = spacy.load('en_core_web_sm')
words = []

for index, row in az_artist_data.iterrows():
    # t_artist_name_az = removeSingleCharStopWords(addSpaceAfterDot(row['artist_name']))
    t_artist_name_az = removeDuplicate(removeSingleCharStopWords(row['artist_name']))
    words.append(t_artist_name_az)

print('Created Word List!')

Created Word List!


In [172]:
word = 'America The Beautiful'
w = removeDuplicate(removeSingleCharStopWords(word))
[l, n] = intersection(w, words)
print([words[l[i]] for i in range(len(l))], n, w)

[['beautiful', 'south', 'the'], ['world', 'is', 'beautiful', 'place', 'am', 'no', 'longer', 'afraid', 'to', 'die', 'the']] 2 ['america', 'the', 'beautiful']


In [175]:
current_count = 1

for index, row in artist_data.iterrows():
    # artist_name_bb = removeSingleCharStopWords(addSpaceAfterDot(row['artist_name_bb']))
    artist_name_bb = removeDuplicate(removeSingleCharStopWords(row['artist_name_bb']))
    artist_name_az = row['artist_name_az']
    
    if pd.isna(artist_name_az):
        [match_index, match_index_lenght]  = intersection(artist_name_bb, words)
        if len(match_index) >= 1:
            print('IN:', row['artist_name_bb'], ' > ', artist_name_bb)
            print('OUT:', [words[match_index[i]] for i in range(len(match_index))])
            print(match_index_lenght/len(artist_name_bb))
            print('-------------')
    
    current_count += 1
    
    if current_count > 1000:
        break

print('Process Matching - Done!')

IN: A B C & D of Boogie Woogie  >  ['boogie', 'woogie']
OUT: [['boogie'], ['boogie', 'wit', 'da', 'hoodie']]
0.5
-------------
IN: A Band of Bees  >  ['band', 'bees']
OUT: [['alex', 'band'], ['allman', 'brothers', 'band', 'the'], ['average', 'white', 'band'], ['band', 'aid', '20'], ['band', 'aid', '30'], ['band', 'alex'], ['band', 'of', 'horses'], ['band', 'of', 'skulls'], ['band', 'perry', 'the'], ['band', 'the'], ['captain', 'beefheart', 'the', 'magic', 'band'], ['casey', 'donahew', 'band'], ['charlie', 'daniels', 'band', 'the'], ['climax', 'blues', 'band'], ['da', 'band'], ['dave', 'matthews', 'band'], ['david', 'crowder', 'band'], ['derek', 'trucks', 'band', 'the'], ['eli', 'young', 'band'], ['gap', 'band', 'the'], ['graham', 'colton', 'band'], ['greg', 'kihn', 'band'], ['james', 'barker', 'band'], ['jeff', 'healey', 'band', 'the'], ['j.', 'geils', 'band', 'the'], ['john', 'cafferty', 'the', 'beaver', 'brown', 'band'], ['john', 'fred', 'his', 'playboy', 'band'], ['josh', 'abbott', 

IN: Aaron Aedy  >  ['aaron', 'aedy']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aaron'], ['watson', 'aaron']]
0.5
-------------
IN: Aaron Barrett  >  ['aaron', 'barrett']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', '

IN: Aaron Davidson  >  ['aaron', 'davidson']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aaron'], ['watson', 'aaron']]
0.5
-------------
IN: Aaron Dilloway  >  ['aaron', 'dilloway']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['

IN: Aaron Hemphill  >  ['aaron', 'hemphill']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aaron'], ['watson', 'aaron']]
0.5
-------------
IN: Aaron Huffman  >  ['aaron', 'huffman']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['ti

IN: Aaron Nelson  >  ['aaron', 'nelson']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aaron'], ['watson', 'aaron']]
0.5
-------------
IN: Aaron Owens  >  ['aaron', 'owens']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', '

IN: Aaron Stuart  >  ['aaron', 'stuart']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aaron'], ['watson', 'aaron']]
0.5
-------------
IN: Aaron Tate  >  ['aaron', 'tate']
OUT: [['aaron', 'carpenter'], ['aaron', 'carter'], ['aaron', 'cole'], ['aaron', 'fresh'], ['aaron', 'goodvin'], ['aaron', 'lewis'], ['aaron', 'lines'], ['aaron', 'neville'], ['aaron', 'shust'], ['aaron', 'tippin'], ['aaron', 'watson'], ['carpenter', 'aaron'], ['carter', 'aaron'], ['cole', 'aaron'], ['fresh', 'aaron'], ['goodvin', 'aaron'], ['lewis', 'aaron'], ['lines', 'aaron'], ['neville', 'aaron'], ['shust', 'aaron'], ['tippin', 'aa

IN: Ace Enders  >  ['ace', 'enders']
OUT: [['ace'], ['ace', 'family', 'the'], ['ace', 'frehley'], ['ace', 'hood'], ['ace', 'of', 'base'], ['frehley', 'ace']]
0.5
-------------
IN: Ace Kefford  >  ['ace', 'kefford']
OUT: [['ace'], ['ace', 'family', 'the'], ['ace', 'frehley'], ['ace', 'hood'], ['ace', 'of', 'base'], ['frehley', 'ace']]
0.5
-------------
IN: Ace of Spades  >  ['ace', 'spades']
OUT: [['ace'], ['ace', 'family', 'the'], ['ace', 'frehley'], ['ace', 'hood'], ['ace', 'of', 'base'], ['frehley', 'ace']]
0.5
-------------
IN: Acey Slade  >  ['acey', 'slade']
OUT: [['slade']]
0.5
-------------
IN: Acid Maria  >  ['acid', 'maria']
OUT: [['ida', 'maria'], ['kristina', 'maria'], ['lynn', 'maria'], ['maria'], ['maria', 'lynn'], ['maria', 'mckee'], ['maria', 'mena'], ['maria', 'taylor'], ['taylor', 'maria']]
0.5
-------------
IN: Across the Atlantic  >  ['across', 'atlantic']
OUT: [['atlantic', 'starr'], ['chase', 'atlantic'], ['twin', 'atlantic']]
0.5
-------------
IN: Ad Wammes  >  ['

IN: Adam Ferry  >  ['adam', 'ferry']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Ficek  >  ['adam', 'ficek']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Fisher  >  ['adam', 'fisher']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'

IN: Adam Lehan  >  ['adam', 'lehan']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Levy  >  ['adam', 'levy']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Lewis  >  ['adam', 'lewis']
OUT: [['aaron', 'lewis'], ['anthony', 'lewis'], ['blake', 'lewis'], ['capaldi', 'lewis'], ['dean', 'lewis'], ['donna', 'lewis'], ['glenn', 'lewis'], 

IN: Adam Polakoff  >  ['adam', 'polakoff']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Popowitz  >  ['adam', 'popowitz']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Powell  >  ['adam', 'powell']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['ada

IN: Adam Wentworth  >  ['adam', 'wentworth']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam White  >  ['adam', 'white']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['adam', 'levine'], ['adam', 'saleh'], ['adam', 'sandler'], ['friedman', 'adam'], ['gontier', 'adam'], ['gregory', 'adam'], ['jensen', 'adam'], ['lambert', 'adam'], ['levine', 'adam'], ['saleh', 'adam'], ['sandler', 'adam']]
0.5
-------------
IN: Adam Williams  >  ['adam', 'williams']
OUT: [['adam', 'ant'], ['adam', 'friedman'], ['adam', 'gontier'], ['adam', 'gregory'], ['adam', 'jensen'], ['adam', 'lambert'], ['ada

IN: Adriel Garcia  >  ['adriel', 'garcia']
OUT: [['adriel', 'favela'], ['favela', 'adriel']]
0.5
-------------
IN: Adrienne Davies  >  ['adrienne', 'davies']
OUT: [['adrienne', 'bailon'], ['bailon', 'adrienne']]
0.5
-------------
IN: Adventures of Stevie V  >  ['adventures', 'stevie']
OUT: [['appleton', 'stevie'], ['brock', 'stevie'], ['hoang', 'stevie'], ['nicks', 'stevie'], ['stevie', 'appleton'], ['stevie'], ['stevie', 'brock'], ['stevie', 'hoang'], ['stevie', 'nicks'], ['stevie', 'ray', 'vaughan'], ['stevie', 'stone'], ['stevie', 'wonder'], ['vaughan', 'stevie', 'ray'], ['wonder', 'stevie']]
0.5
-------------
IN: Afilia Saga  >  ['afilia', 'saga']
OUT: [['saga']]
0.5
-------------
IN: Afrika Bambaataa & The Soulsonic Force  >  ['afrika', 'bambaataa', 'the', 'soulsonic', 'force']
OUT: [['acacia', 'strain', 'the'], ['academic', 'the'], ['academy', 'is', '...', 'the'], ['ace', 'family', 'the'], ['aces', 'the'], ['afghan', 'whigs', 'the'], ['afters', 'the'], ['after', 'the', 'burial'],

IN: Agent M  >  ['agent']
OUT: [['agent', 'orange'], ['sleeper', 'agent']]
1.0
-------------
IN: Agust  >  ['agust']
OUT: [['agust']]
1.0
-------------
IN: Agustin Sanchez  >  ['agustin', 'sanchez']
OUT: [['jessica', 'sanchez'], ['leroy', 'sanchez'], ['sanchez', 'jessica'], ['sanchez', 'leroy']]
0.5
-------------
IN: Ahaguna G. Sun  >  ['ahaguna', 'g.', 'sun']
OUT: [['empire', 'of', 'the', 'sun'], ['hail', 'the', 'sun'], ['mod', 'sun'], ['red', 'sun', 'rising'], ['seven', 'and', 'the', 'sun'], ['sun', 'diego'], ['sun', 'kil', 'moon']]
0.3333333333333333
-------------
IN: Ahaguna Sun  >  ['ahaguna', 'sun']
OUT: [['empire', 'of', 'the', 'sun'], ['hail', 'the', 'sun'], ['mod', 'sun'], ['red', 'sun', 'rising'], ['seven', 'and', 'the', 'sun'], ['sun', 'diego'], ['sun', 'kil', 'moon']]
0.5
-------------
IN: Ahn So Hee  >  ['ahn', 'so', 'hee']
OUT: [['ahn', 'priscilla'], ['priscilla', 'ahn']]
0.3333333333333333
-------------
IN: Ai Fei & Sdanny Lee   >  ['ai', 'fei', 'sdanny', 'lee']
OUT: [['

IN: Al Cisneros  >  ['al', 'cisneros']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Connelly  >  ['al', 'connelly']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Contrera  >  ['al', 'contrera']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Cooper  >  ['al', 'cooper']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yan

IN: Al Strong  >  ['al', 'strong']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Tariq  >  ['al', 'tariq']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Tharp  >  ['al', 'tharp']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic', 'weird', 'al']]
0.5
-------------
IN: Al Ways  >  ['al', 'ways']
OUT: [['al', 'green'], ['al', 'james'], ['al', 'jarreau'], ['al', 'stewart'], ['green', 'al'], ['james', 'al'], ['jarreau', 'al'], ['stewart', 'al'], ['weird', 'al', 'yankovic'], ['yankovic'

IN: Alan Howard  >  ['alan', 'howard']
OUT: [['adina', 'howard'], ['ben', 'howard'], ['howard', 'adina'], ['howard', 'ben'], ['howard', 'jones'], ['howard', 'miki'], ['jones', 'howard'], ['miki', 'howard']]
0.5
-------------
IN: Alan Hull  >  ['alan', 'hull']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan Hunter  >  ['alan', 'hunter']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan Jackman  >  ['alan', 'jackman']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan Jones  >  ['alan', 'jones']
OU

IN: Alan Silson  >  ['alan', 'silson']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan Sparhawk  >  ['alan', 'sparhawk']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan Spenner  >  ['alan', 'spenner']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.5
-------------
IN: Alan St. Clair  >  ['alan', 'st.', 'clair']
OUT: [['alan', 'jackson'], ['alan', 'parsons', 'project', 'the'], ['alan', 'walker'], ['gregory', 'alan', 'isakov'], ['isakov', 'gregory', 'alan'], ['jackson', 'alan'], ['walker', 'alan']]
0.3333333333333333


IN: Alberto Guerrero Lopez  >  ['alberto', 'guerrero', 'lopez']
OUT: [['jennifer', 'lopez'], ['lopez', 'jennifer'], ['lopez', 'sasha'], ['sasha', 'lopez']]
0.3333333333333333
-------------
IN: Alberto Iglesias  >  ['alberto', 'iglesias']
OUT: [['enrique', 'iglesias'], ['iglesias', 'enrique'], ['iglesias', 'julio'], ['julio', 'iglesias']]
0.5
-------------
IN: Albeth Paris  >  ['albeth', 'paris']
OUT: [['hilton', 'paris'], ['paris'], ['paris', 'hilton'], ['paris', 'sarina'], ['paris', 'twila'], ['sarina', 'paris'], ['twila', 'paris']]
0.5
-------------
IN: Albeth Paris  >  ['albeth', 'paris']
OUT: [['hilton', 'paris'], ['paris'], ['paris', 'hilton'], ['paris', 'sarina'], ['paris', 'twila'], ['sarina', 'paris'], ['twila', 'paris']]
0.5
-------------
IN: Aldo Sarabia  >  ['aldo', 'sarabia']
OUT: [['aldo', 'nova'], ['nova', 'aldo']]
0.5
-------------
IN: Alec Burke  >  ['alec', 'burke']
OUT: [['alec', 'benjamin'], ['benjamin', 'alec']]
0.5
-------------
IN: Alec Empire  >  ['alec', 'empire

IN: Alex Blackwell  >  ['alex', 'blackwell']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Bleeker  >  ['alex', 'bleeker']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['

IN: Alex Conti  >  ['alex', 'conti']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Cooper  >  ['alex', 'cooper']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'go

IN: Alex Garcia Rivera  >  ['alex', 'garcia', 'rivera']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.3333333333333333
-------------
IN: Alex Gaskarth  >  ['alex', 'gaskarth']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['ale

IN: Alex Grullon  >  ['alex', 'grullon']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Hassilev  >  ['alex', 'hassilev']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['al

IN: Alex Kent  >  ['alex', 'kent']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Kentucky  >  ['alex', 'kentucky']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', '

IN: Alex Lora  >  ['alex', 'lora']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Lustig  >  ['alex', 'lustig']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonz

IN: Alex Onizawa  >  ['alex', 'onizawa']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Pappas  >  ['alex', 'pappas']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex',

IN: Alex Schrodt  >  ['alex', 'schrodt']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Sharpe  >  ['alex', 'sharpe']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex',

IN: Alex Tenas  >  ['alex', 'tenas']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex To  >  ['alex', 'to']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'],

IN: Alex Woodrow  >  ['alex', 'woodrow']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', 'gonzaga'], ['alex', 'goot'], ['alex', 'hepburn'], ['alex', 'lahey'], ['alex', 'mattson'], ['alex', 'mica'], ['alex', 'parks'], ['alex', 'sensation'], ['alex', 'sierra'], ['alex', 'turner'], ['alex', 'vargas'], ['band', 'alex'], ['cameron', 'alex'], ['campos', 'alex'], ['clare', 'alex'], ['day', 'alex'], ['gaudino', 'alex'], ['gonzaga', 'alex'], ['goot', 'alex'], ['hepburn', 'alex'], ['lahey', 'alex'], ['mattson', 'alex'], ['mica', 'alex'], ['nat', 'and', 'alex', 'wolff'], ['parks', 'alex'], ['turner', 'alex'], ['vargas', 'alex']]
0.5
-------------
IN: Alex Young  >  ['alex', 'young']
OUT: [['alex', 'aiono'], ['alex', 'band'], ['alex', 'cameron'], ['alex', 'campos'], ['alex', 'clare'], ['alex', 'da', 'kid'], ['alex', 'day'], ['alex'], ['alex', 'gaudino'], ['alex', '

IN: Alfonso Andre  >  ['alfonso', 'andre']
OUT: [['andre', 'nickatina'], ['andre', 'peter'], ['nickatina', 'andre'], ['peter', 'andre']]
0.5
-------------
IN: Alfonso Ayala  >  ['alfonso', 'ayala']
OUT: [['ayala', 'ramon'], ['ramon', 'ayala']]
0.5
-------------
IN: Alfonzo Smith  >  ['alfonzo', 'smith']
OUT: [['april', 'smith'], ['april', 'smith', 'and', 'the', 'great', 'picture', 'show'], ['caitlyn', 'smith'], ['canaan', 'smith'], ['corey', 'smith'], ['dallas', 'smith'], ['elliott', 'smith'], ['granger', 'smith'], ['jaden', 'smith'], ['jordan', 'smith'], ['jorja', 'smith'], ['kim', 'walker', 'smith'], ['michael', 'w.', 'smith'], ['mindy', 'smith'], ['patti', 'smith'], ['sam', 'smith'], ['smith', 'april'], ['smith', 'caitlyn'], ['smith', 'canaan'], ['smith', 'corey'], ['smith', 'dallas'], ['smith', 'elliott'], ['smith', 'granger'], ['smith', 'jaden'], ['smith', 'jordan'], ['smith', 'jorja'], ['smith', 'michael', 'w.'], ['smith', 'mindy'], ['smith', 'patti'], ['smith', 'westerns'], ['sm

IN: Alison Shaw  >  ['alison', 'shaw']
OUT: [['alison', 'gold'], ['alison', 'krauss'], ['alison', 'moyet'], ['alison', 'wonderland'], ['gold', 'alison'], ['krauss', 'alison'], ['moyet', 'alison'], ['wonderland', 'alison']]
0.5
-------------
IN: Alison Toews  >  ['alison', 'toews']
OUT: [['alison', 'gold'], ['alison', 'krauss'], ['alison', 'moyet'], ['alison', 'wonderland'], ['gold', 'alison'], ['krauss', 'alison'], ['moyet', 'alison'], ['wonderland', 'alison']]
0.5
-------------
IN: Alissa White Gluz  >  ['alissa', 'white', 'gluz']
OUT: [['average', 'white', 'band'], ['barry', 'white'], ['bryan', 'white'], ['drake', 'white'], ['great', 'white'], ['jack', 'white'], ['jim', 'white'], ['kalin', 'white'], ['karyn', 'white'], ['keisha', 'white'], ['matt', 'white'], ['motionless', 'in', 'white'], ['neverending', 'white', 'lights'], ['plain', 'white', "'s"], ['white', 'barry'], ['white', 'bryan'], ['white', 'buffalo', 'the'], ['white', 'drake'], ['white', 'jack'], ['white', 'jim'], ['white', 

IN: Allen Johnson  >  ['allen', 'johnson']
OUT: [['alexz', 'johnson'], ['brothers', 'johnson', 'the'], ['carolyn', 'dawn', 'johnson'], ['cody', 'johnson'], ['dwayne', 'johnson'], ['hobo', 'johnson'], ['jack', 'johnson'], ['jamey', 'johnson'], ['jillette', 'johnson'], ['johnson', 'alexz'], ['johnson', 'carolyn', 'dawn'], ['johnson', 'cody'], ['johnson', 'dwayne'], ['johnson', "le'andria"], ['johnson', 'lia', 'marie'], ['johnson', 'louisa'], ['johnson', 'robert'], ['johnson', 'syleena'], ["le'andria", 'johnson'], ['lia', 'marie', 'johnson'], ['louisa', 'johnson'], ['robert', 'johnson'], ['syleena', 'johnson']]
0.5
-------------
IN: Allen Kemp  >  ['allen', 'kemp']
OUT: [['allen', 'hoodie'], ['allen', 'kris'], ['allen', 'lily'], ['allen', 'stone'], ['andrew', 'allen'], ['hoodie', 'allen'], ['kris', 'allen'], ['lily', 'allen'], ['stone', 'allen']]
0.5
-------------
IN: Allen Lanier  >  ['allen', 'lanier']
OUT: [['allen', 'hoodie'], ['allen', 'kris'], ['allen', 'lily'], ['allen', 'stone'], 

IN: Alvaro Pava  >  ['alvaro', 'pava']
OUT: [['alvaro', 'soler'], ['soler', 'alvaro']]
0.5
-------------
IN: Alvin  >  ['alvin']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
1.0
-------------
IN: Alvin  >  ['alvin']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
1.0
-------------
IN: Alvin & the Chipmunks  >  ['alvin', 'chipmunks']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
1.0
-------------
IN: Alvin Cash & the Crawlers  >  ['alvin', 'cash', 'crawlers']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
0.3333333333333333
-------------
IN: Alvin Cash & The Registers  >  ['alvin', 'cash', 'the', 'registers']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
0.5
-------------
IN: Alvin Chea  >  ['alvin', 'chea']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
0.5
-------------
IN: Alvin Ewen  >  ['alvin', 'ewen']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
0.5
-------------
IN: Alvin Patterson  >  ['alvin', 'patterson']
OUT: [['alvin', 'and', 'the', 'chipmunks']]
0.5
-------------
IN: Alvin Pazant  >  ['al

IN: Amber Kuo  >  ['amber', 'kuo']
OUT: [['amber', 'mark'], ['amber', 'pacific'], ['amber', 'riley'], ['amber', 'rose'], ['amber', 'run'], ['mark', 'amber'], ['riley', 'amber'], ['rose', 'amber']]
0.5
-------------
IN: Amber Kuo & Cai Guoqing   >  ['amber', 'kuo', 'cai', 'guoqing']
OUT: [['amber', 'mark'], ['amber', 'pacific'], ['amber', 'riley'], ['amber', 'rose'], ['amber', 'run'], ['mark', 'amber'], ['riley', 'amber'], ['rose', 'amber']]
0.25
-------------
IN: Amber Ross  >  ['amber', 'ross']
OUT: [['amber', 'mark'], ['amber', 'pacific'], ['amber', 'riley'], ['amber', 'rose'], ['amber', 'run'], ['mark', 'amber'], ['riley', 'amber'], ['rose', 'amber']]
0.5
-------------
IN: Amber Webber  >  ['amber', 'webber']
OUT: [['amber', 'mark'], ['amber', 'pacific'], ['amber', 'riley'], ['amber', 'rose'], ['amber', 'run'], ['mark', 'amber'], ['riley', 'amber'], ['rose', 'amber']]
0.5
-------------
IN: Amedeo Pace  >  ['amedeo', 'pace']
OUT: [['change', 'of', 'pace']]
0.5
-------------
IN: Amee 

In [None]:
fz = fuzzyset.FuzzySet()
fz_data = az_artist_data['artist_name'].values
for l in fz_data:
    fz.add(l)

In [None]:
current_count = 1

for index, row in artist_data.iterrows():
    artist_name_bb = row['artist_name_bb']
    artist_name_az = row['artist_name_az']
    
    if pd.isna(artist_name_az):
        print('IN:', artist_name_bb)
        print('OUT:', fz.get(artist_name_bb))
        print('-------------')
    
    current_count += 1
    
    if current_count > 100:
        break

print('Process Matching - Done!')

In [None]:
artist_match_accuracy = []
artist_match_count = []
artist_index_set = []

current_count = 1
total_count = len(artist_data.index)

print('Started Processing: ', total_count, 'Records')

for index, row in artist_data.iterrows():
    artist_group_az = ''
    artist_name_az = ''
    artist_url_az = 0
    
    artist_name_bb = row['artist_name_bb']
    artist_name_az = row['artist_name_az']
    
    if pd.isna(artist_name_bb):
        artist_match_accuracy.append(0.00)
        artist_index_set.append('Null/Nan - Ignored')
        artist_match_count.append(0)
    else:
        if pd.isna(artist_name_az):
            [artist_group_az, artist_name_az, artist_url_az, accuracy, index_set, match_count]\
                = getArtistDetailsFromAZ(artist_name_bb)
            artist_data.at[index, 'artist_group_az'] = artist_group_az
            artist_data.at[index, 'artist_name_az'] = artist_name_az
            artist_data.at[index, 'artist_url_az'] = artist_url_az
            artist_match_accuracy.append(accuracy)
            artist_index_set.append(index_set)
            artist_match_count.append(match_count)
        else:
            artist_match_accuracy.append(2.00)
            artist_index_set.append('Direct Relational Match')
            artist_match_count.append(0)
    
    if (current_count % 100) == 0:
        print('Processed: ', current_count, ' / ', total_count)
    
    current_count += 1
    
    if current_count > 10:
        break
        
artist_data['artist_match_accuracy_az'] = artist_match_accuracy
artist_data['artist_index_set_az'] = artist_index_set
artist_data['artist_match_count_az'] = artist_match_count

print('Process Matching - Done!')

In [25]:
setlist = [{'Chloe', 'Adams,'}, {'Joel', 'Adams,'}, {'Adams,', 'Oleta'}, {'Peter', 'Bradley', 'Adams,'}]
set.intersection(*setlist)

{'Adams,'}