In [1]:
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import time

## Load Dataframes

In [4]:
df_artist_uri = pd.read_csv('Data/artist_uri.csv',sep='|')
df_artist_uri.head(5)

Unnamed: 0,org_artist_name,info_artist_uri,info_artist_name,info_followers_total,info_popularity
0,beyonce-knowles,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0
1,asher-monroe,7nyLigsJ4LWLqVvMrSlQf7,Asher Monroe,53630.0,32.0
2,alice-on-the-roof,4M07FSqpxgqLfCOaX1WUei,Alice on the roof,22209.0,46.0
3,daliah-lavi,6az1ImuFcfXd1Rnzonmqau,Daliah Lavi,10575.0,37.0
4,borialis,2wXXMRH4VFDhS2fFE9swlz,Borialis,2645.0,28.0


In [5]:
df_lyrics = pd.read_csv('Data/raw_lyrics.csv',sep=',',index_col=False)
df_lyrics = df_lyrics.drop(columns='year')
df_lyrics = df_lyrics.drop(columns='index')
#df_lyrics['clean_song'] = df_lyrics['song'].apply(lambda x: x.replace('-',' '))
df_lyrics.head(5)
#df_lyrics.dtypes

Unnamed: 0,song,org_artist_name,genre,lyrics
0,ego-remix,beyonce-knowles,Pop,"Oh baby, how you doing?\nYou know I'm gonna cu..."
1,then-tell-me,beyonce-knowles,Pop,"playin' everything so easy,\nit's like you see..."
2,honesty,beyonce-knowles,Pop,If you search\nFor tenderness\nIt isn't hard t...
3,you-are-my-rock,beyonce-knowles,Pop,"Oh oh oh I, oh oh oh I\n[Verse 1:]\nIf I wrote..."
4,black-culture,beyonce-knowles,Pop,"Party the people, the people the party it's po..."


In [6]:
df_track_uri = pd.read_csv('Data/artist_album_track_uri.csv',sep='|')
df_track_uri.head(5)

Unnamed: 0,album_uri,artist_name,artist_uri,track_name,track_uri
0,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,balance (mufasa interlude),1RMvRv1tAvWXgYdH8DUdLX
1,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,BIGGER,4R2KJOgEUP2qO3re9BwIhs
2,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,the stars (mufasa interlude),50QREAhIciZoAiRNtyELEh
3,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,FIND YOUR WAY BACK,65kk9CAAqFI3LWBEhUwVqd
4,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,uncle scar (scar interlude),4KJ3NHQvLjA0hB2nv43cxD


## Find Matches

In [7]:
df_org_artist_name = df_artist_uri['org_artist_name']

df_spotify_artist_name = df_artist_uri['info_artist_name'].str.lower()

master_match = []


for x in range(len(df_org_artist_name)):
    a = fuzz.partial_ratio(df_org_artist_name[x],df_spotify_artist_name[x])
    master_match.append(a)
    


## Create Fuzzy Wuzzy Match Column in artist_uri df

In [8]:
df_artist_uri['fuzzy_wuzzy_match'] = master_match

## Only keep matches above 80

In [10]:
trim_df_artist_uri = df_artist_uri.loc[df_artist_uri['fuzzy_wuzzy_match']>=80]

## Merge datasets match on org_artist_name (info_artist_name) and lyrics.csv (artist)

In [13]:
df_complete = df_lyrics.merge(trim_df_artist_uri,left_on='org_artist_name',right_on='org_artist_name')
df_complete['term_org'] = df_complete['song'].apply(lambda x: x.replace('-',' '))
df_complete.sample(n=50)

Unnamed: 0,song,org_artist_name,genre,lyrics,info_artist_uri,info_artist_name,info_followers_total,info_popularity,fuzzy_wuzzy_match,term_org
89783,forged-in-iron-crowned-in-steel,grand-magus,Metal,All the prophets and deceivers\ntry to lead us...,7GTkjp7qsfmx8iJzBUQLmN,Grand Magus,34731.0,39.0,91,forged in iron crowned in steel
7283,a-simple-matter-of-conviction,bill-evans,Jazz,,4jXfFzeP66Zy67HM2mvIIF,Bill Evans,382435.0,64.0,90,a simple matter of conviction
173758,in-her-music-box,atmosphere,Hip-Hop,She had a bad dream in the back seat\nthe same...,1GAS0rb4L8VTPvizAx2O9J,Atmosphere,525547.0,63.0,100,in her music box
70392,juke-jam,chance-the-rapper,Pop,[Verse 1: Chance The Rapper]\nWe used to roll ...,1anyVhU62p31KFi8MEzkbf,Chance the Rapper,4792861.0,84.0,88,juke jam
91984,get-your-money-up,dmx,Hip-Hop,"Fuck if you want nigga, I gots to let you know...",1HwM5zlC5qNWhJtM00yXzG,DMX,1906944.0,73.0,100,get your money up
16535,try-to-save-your-s-ong,alejandro-sanz,Pop,"Dime, quiÃ©n va a salvarte a ti tus besos\nCua...",5sUrlPAHlS9NEirDB8SEbF,Alejandro Sanz,4132993.0,78.0,93,try to save your s ong
9429,staring-at-the-rude-boys,gallows,Rock,Its a small-minded world in the middle of a cr...,0LpwbxbT8dPiqUnul4ngzu,Gallows,53098.0,40.0,100,staring at the rude boys
168187,dutty-wine-gal,beenie-man,Hip-Hop,Stang brrrr steng ba dam ba dam\nFi di gal dem...,4L3GTE04bW5N7azA9QPhjA,Beenie Man,280574.0,62.0,90,dutty wine gal
211244,sad-boy,the-black-halos,Rock,Deep in your heart you can't be wrong\nLost in...,5hUckvxtUKCODb4zgGcAXL,The Black Halos,1304.0,14.0,87,sad boy
3879,i-don-t-care,danko-jones,Rock,I don't care if the world is about to blow up\...,7CGoviGsNXYmGOBkXk8dtW,Danko Jones,135266.0,56.0,91,i don t care


## Export Unique Artists for Get_AlbumURI.ipynb

In [14]:
trim_df_complete = df_complete[['info_artist_uri','info_artist_name']].drop_duplicates()
#trim_df_complete.to_csv('Data/artist_uri_fuzz.csv',sep='|',index=False)

## Org = df_lyrics columns - Use dataframe to search term in large album track uri

In [17]:
df_comp_org = df_complete[['info_artist_uri','song']]

## Create term column

In [18]:
df_comp_org['term'] = df_comp_org[['info_artist_uri', 'song']].apply(lambda x: ''.join(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


## track_uri = df_artist_uri columns - Use dataframe to be paired with df_comp_org term

In [21]:
df_comp_track_uri = df_track_uri[['artist_uri','track_name','track_uri']]

## Create term column

In [22]:
df_comp_track_uri['term'] = df_comp_track_uri[['artist_uri', 'track_name']].apply(lambda x: ''.join(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [23]:
df_comp_track_uri.head(10)

Unnamed: 0,artist_uri,track_name,track_uri,term
0,6vWDO969PvNqNYHIOW5v0m,balance (mufasa interlude),1RMvRv1tAvWXgYdH8DUdLX,6vWDO969PvNqNYHIOW5v0mbalance (mufasa interlude)
1,6vWDO969PvNqNYHIOW5v0m,BIGGER,4R2KJOgEUP2qO3re9BwIhs,6vWDO969PvNqNYHIOW5v0mBIGGER
2,6vWDO969PvNqNYHIOW5v0m,the stars (mufasa interlude),50QREAhIciZoAiRNtyELEh,6vWDO969PvNqNYHIOW5v0mthe stars (mufasa interl...
3,6vWDO969PvNqNYHIOW5v0m,FIND YOUR WAY BACK,65kk9CAAqFI3LWBEhUwVqd,6vWDO969PvNqNYHIOW5v0mFIND YOUR WAY BACK
4,6vWDO969PvNqNYHIOW5v0m,uncle scar (scar interlude),4KJ3NHQvLjA0hB2nv43cxD,6vWDO969PvNqNYHIOW5v0muncle scar (scar interlude)
5,6vWDO969PvNqNYHIOW5v0m,DON'T JEALOUS ME,1yvFoBp2Bq1ilD0518ZpQx,6vWDO969PvNqNYHIOW5v0mDON'T JEALOUS ME
6,6vWDO969PvNqNYHIOW5v0m,danger (young simba & young nala interlude),6Y6BbZoYe2mNLceeoUZdoT,6vWDO969PvNqNYHIOW5v0mdanger (young simba & yo...
7,6vWDO969PvNqNYHIOW5v0m,JA ARA E,6pdip6qgVJOI5JxqgbAlu6,6vWDO969PvNqNYHIOW5v0mJA ARA E
8,6vWDO969PvNqNYHIOW5v0m,run away (scar & young simba interlude),19omXUq2TcREoUoHqbVLTq,6vWDO969PvNqNYHIOW5v0mrun away (scar & young s...
9,6vWDO969PvNqNYHIOW5v0m,NILE,0lboDHM9hSR5j5CFlWDR9k,6vWDO969PvNqNYHIOW5v0mNILE


## Create master dictionary to hold artist_uri, term, and track_uri from df_complete

In [27]:
master_dictionary = {}

keys = df_artist_uri['info_artist_uri']

for key in keys:
    master_dictionary.update({key:[]})

## Test Dictionary

In [34]:
master_dictionary

{'6vWDO969PvNqNYHIOW5v0m': [],
 '7nyLigsJ4LWLqVvMrSlQf7': [],
 '4M07FSqpxgqLfCOaX1WUei': [],
 '6az1ImuFcfXd1Rnzonmqau': [],
 '2wXXMRH4VFDhS2fFE9swlz': [],
 '0ug84nvWi4PxvGIL52EZWr': [],
 '0gt8ziBNDt2u3UCOPE5CNx': [],
 '1L7EOA3HMDy6lOdZKuwedm': [],
 '22ojy4H4ZVpowC4lRRC8In': [],
 '3VQfHuqrRK1CNXR1V8PeR9': [],
 '3Z8Ab6lOrrOzIekUwxLxI0': [],
 '7hZjPEqwCy3BUPaxebd57q': [],
 '3O9bFJBTRddC4HOsZixhuw': [],
 '49eo9xE4yGzJLWkOndiODs': [],
 '1rT2DYzj3q6QnrxaqF4jnN': [],
 '5fBimwbrIjCqNTTlCFHzon': [],
 '4TAV6oFZ5ARdlybbpFAFri': [],
 '4UvhuGdFf75Cfan7lEU1J1': [],
 '6jFcoeKxHPBbCIgFjWm6bc': [],
 '12JKvbMl7cQqmtumTZTT3g': [],
 '0hU5urLse5h1Z0b4zQkovL': [],
 '77OlE8SkEHDkJczfswzDFo': [],
 '3kjuyTCjPG1WMFCiyc5IuB': [],
 '450o9jw6AtiQlQkHCdH6Ru': [],
 '7dSnChJjb0jdfulJsIijoC': [],
 '4x7gxsrTH3gThvSKZPPwaQ': [],
 '4uIdP3jwyR0xifCS2FYS3o': [],
 '1C12JVV5gbeImz20zxX53l': [],
 '2ibMjULyZPLJFpid0NOAGt': [],
 '2BQ6kU5WrbK8qncGLWYOtB': [],
 '37zKpwMQ7NFzk0MPR4FUJe': [],
 '2ykUp1K8tRaOUFd8vvcwXl': [],
 '4w3TFO

## Create list of columns to be appended to master dictionary

In [32]:
append_to_master = df_comp_track_uri[['artist_uri','track_name','track_uri']]

In [33]:
search_term_list = append_to_master.values.tolist()
search_term_list

[['6vWDO969PvNqNYHIOW5v0m',
  'balance (mufasa interlude)',
  '1RMvRv1tAvWXgYdH8DUdLX'],
 ['6vWDO969PvNqNYHIOW5v0m', 'BIGGER', '4R2KJOgEUP2qO3re9BwIhs'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'the stars (mufasa interlude)',
  '50QREAhIciZoAiRNtyELEh'],
 ['6vWDO969PvNqNYHIOW5v0m', 'FIND YOUR WAY BACK', '65kk9CAAqFI3LWBEhUwVqd'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'uncle scar (scar interlude)',
  '4KJ3NHQvLjA0hB2nv43cxD'],
 ['6vWDO969PvNqNYHIOW5v0m', "DON'T JEALOUS ME", '1yvFoBp2Bq1ilD0518ZpQx'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'danger (young simba & young nala interlude)',
  '6Y6BbZoYe2mNLceeoUZdoT'],
 ['6vWDO969PvNqNYHIOW5v0m', 'JA ARA E', '6pdip6qgVJOI5JxqgbAlu6'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'run away (scar & young simba interlude)',
  '19omXUq2TcREoUoHqbVLTq'],
 ['6vWDO969PvNqNYHIOW5v0m', 'NILE', '0lboDHM9hSR5j5CFlWDR9k'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'new lesson (timon, pumbaa & young simba interlude)',
  '3c3XyNDlGm6607OfqGyBHa'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'MOOD 4 EVA (feat. Oumou Sangaré

## Find paired dictionary key. Then append term and track_uri dictionary to key's list.

In [42]:
for search_term in search_term_list:
    master_dictionary[search_term[0]].append({'term':search_term[1].lower(),'track_uri':search_term[2]})

## Clean song column for search

In [43]:
initial_comp = df_comp_org[['info_artist_uri','song']]
initial_comp['clean_song'] = initial_comp['song'].apply(lambda x: x.replace('-',' '))
initial_comp = initial_comp.drop(columns='song')

org_comp = initial_comp.values.tolist()


In [44]:
org_comp

[['6vWDO969PvNqNYHIOW5v0m', 'ego remix'],
 ['6vWDO969PvNqNYHIOW5v0m', 'then tell me'],
 ['6vWDO969PvNqNYHIOW5v0m', 'honesty'],
 ['6vWDO969PvNqNYHIOW5v0m', 'you are my rock'],
 ['6vWDO969PvNqNYHIOW5v0m', 'black culture'],
 ['6vWDO969PvNqNYHIOW5v0m', 'all i could do was cry'],
 ['6vWDO969PvNqNYHIOW5v0m', 'once in a lifetime'],
 ['6vWDO969PvNqNYHIOW5v0m', 'waiting'],
 ['6vWDO969PvNqNYHIOW5v0m', 'slow love'],
 ['6vWDO969PvNqNYHIOW5v0m', 'why don t you love me'],
 ['6vWDO969PvNqNYHIOW5v0m', 'save the hero'],
 ['6vWDO969PvNqNYHIOW5v0m', 'telephone'],
 ['6vWDO969PvNqNYHIOW5v0m', 'ice cream truck'],
 ['6vWDO969PvNqNYHIOW5v0m', 'no broken hearted girl'],
 ['6vWDO969PvNqNYHIOW5v0m', 'control'],
 ['6vWDO969PvNqNYHIOW5v0m', 'i m alone now'],
 ['6vWDO969PvNqNYHIOW5v0m', 'poison'],
 ['6vWDO969PvNqNYHIOW5v0m', 'world wide women'],
 ['6vWDO969PvNqNYHIOW5v0m', 'beautiful liar'],
 ['6vWDO969PvNqNYHIOW5v0m', 'beautiful liar spanish'],
 ['6vWDO969PvNqNYHIOW5v0m', 'beautiful liar spanglish version'],
 ['6v

## Use Fuzzy Wuzzy to match term in org_comp to term in master dictionary

In [48]:
uri_match = []

start = time.time()

#Iterate over length of org_comp

for x in range(len(org_comp)):
    
    artist_uri = org_comp[x][0]
    
    try:
        
        find = master_dictionary[artist_uri]
        
        #Iternate over length of terms found in key = artist_uri
        
        for i in range(len(find)):
            
            match = fuzz.partial_ratio(org_comp[x][1],find[i]['term'])
            
            if match >= 90:
                
                uri_match.append({'term_org':org_comp[x][1],'term_comparison':find[i]['term'],
                                  'track_uri':master_dictionary[artist_uri][i]['track_uri'],
                                  'artist_uri':artist_uri,'Fuzzy Score':match})
                break
                
    except IndexError:
        continue

end = time.time()    
duration = (end-start)/60

## Example output

In [353]:
uri_match[0]

{'term_org': 'why don t you love me',
 'term_comparison': "why don't you love me",
 'track_uri': '5Ui8M6tfknhXo4MuGHt3Dy',
 'artist_uri': '6vWDO969PvNqNYHIOW5v0m',
 'Fuzzy Score': 95}

## Duration

In [354]:
f"{duration} minutes"

'4.775897300243377 minutes'

## Make info_artist_uri be artist_uri so future merge is clean

In [371]:
df_add = df_artist_uri[['info_artist_uri','info_artist_name']]
df_add = df_add.rename(columns={'info_artist_uri':'artist_uri'})

## Final DataFrame

In [373]:
df_successful_matches = pd.DataFrame(uri_match)
df_successful_matches = df_add.merge(df_successful_matches)
df_successful_matches.to_csv('Data/fuzzy_audiofeatures.csv',sep='|',index=False)
df_successful_matches

Unnamed: 0,artist_uri,info_artist_name,Fuzzy Score,term_comparison,term_org,track_uri
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,save the hero,save the hero,5dhPqcLr5EcSd7Fe4fslCq
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,broken-hearted girl,no broken hearted girl,5dWTQXVHdoIsSLpEyS3woy
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,lift ev'ry voice and sing - homecoming live,if,0QRxJvOohS8yiGC1n98uFM
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,no angel,angel,4DActPOAtak2m8meZeMt3B
5,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,mine (feat. drake),mine,63FrXif0Pdu4NAPvTh87mw
6,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,superpower (feat. frank ocean),superpower,49sXkAcR5LvOrtq5Qcn5cf
7,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,haunted,haunted,7cioKB5CHVzk09SOtTyn0T
8,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,flawless / feeling myself - homecoming live,flawless,6ma6Oe9PrzJsckdCebJoFM
9,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,partition,partition,5hgnY0mVcVetszbb85qeDg


In [290]:
df_lyrics_complete = df_successful_matches.merge(df_complete,left_on='term_org',right_on='term_org',how='inner')

In [291]:
#df_lyrics_complete

Unnamed: 0,Fuzzy Score,term_comparison,term_org,track_uri,song,org_artist_name,genre,lyrics,info_artist_uri,info_artist_name,info_followers_total,info_popularity,fuzzy_wuzzy_match
0,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy,why-don-t-you-love-me,beyonce-knowles,Pop,"N-n-now, honey\nYou better sit down and look a...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86
1,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy,why-don-t-you-love-me,amanda-marshall,Rock,Why am I lonely\nYou're sitting right here\nWh...,2ON3fLFbL1rHfHEjeYNKsO,Amanda Marshall,30257.0,42.0,93
2,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy,why-don-t-you-love-me,alice-cooper,Rock,Why don't you love me?\nWhy don't you love me?...,3EhbVgyfGd7HkpsagwL9GS,Alice Cooper,1947328.0,67.0,92
3,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy,why-don-t-you-love-me,don-gibson,Country,"Well, why don't you love me like you used to d...",4xcYVPssil6vbG6tq3W43S,Don Gibson,47833.0,48.0,90
4,95,why don't you love me?,why don t you love me,6bZFpe8vrIg7q5bejYbaXY,why-don-t-you-love-me,beyonce-knowles,Pop,"N-n-now, honey\nYou better sit down and look a...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86
5,95,why don't you love me?,why don t you love me,6bZFpe8vrIg7q5bejYbaXY,why-don-t-you-love-me,amanda-marshall,Rock,Why am I lonely\nYou're sitting right here\nWh...,2ON3fLFbL1rHfHEjeYNKsO,Amanda Marshall,30257.0,42.0,93
6,95,why don't you love me?,why don t you love me,6bZFpe8vrIg7q5bejYbaXY,why-don-t-you-love-me,alice-cooper,Rock,Why don't you love me?\nWhy don't you love me?...,3EhbVgyfGd7HkpsagwL9GS,Alice Cooper,1947328.0,67.0,92
7,95,why don't you love me?,why don t you love me,6bZFpe8vrIg7q5bejYbaXY,why-don-t-you-love-me,don-gibson,Country,"Well, why don't you love me like you used to d...",4xcYVPssil6vbG6tq3W43S,Don Gibson,47833.0,48.0,90
8,100,save the hero,save the hero,5dhPqcLr5EcSd7Fe4fslCq,save-the-hero,beyonce-knowles,Pop,I lay alone awake at night\nSorrow fills my ey...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86
9,95,broken-hearted girl,no broken hearted girl,5dWTQXVHdoIsSLpEyS3woy,no-broken-hearted-girl,beyonce-knowles,Pop,Youre everything I thought you never were\nAnd...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86
