In [1]:
import pandas as pd
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
import time

## Load Dataframes

In [2]:
df_artist_uri = pd.read_csv('Data/nlp_artist_uri.csv',sep='|')
df_artist_uri.head(5)

Unnamed: 0,org_artist_name,info_artist_uri,info_artist_name,info_followers_total,info_popularity
0,beyonce-knowles,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0
1,asher-monroe,7nyLigsJ4LWLqVvMrSlQf7,Asher Monroe,53630.0,32.0
2,alice-on-the-roof,4M07FSqpxgqLfCOaX1WUei,Alice on the roof,22209.0,46.0
3,daliah-lavi,6az1ImuFcfXd1Rnzonmqau,Daliah Lavi,10575.0,37.0
4,borialis,2wXXMRH4VFDhS2fFE9swlz,Borialis,2645.0,28.0


In [3]:
df_lyrics = pd.read_csv('Data/raw_lyrics.csv',sep=',',index_col=False)
df_lyrics = df_lyrics.drop(columns='year')
#df_lyrics['clean_song'] = df_lyrics['song'].apply(lambda x: x.replace('-',' '))
df_lyrics.head(10)
#df_lyrics.dtypes

Unnamed: 0,index,song,org_artist_name,genre,lyrics
0,0,ego-remix,beyonce-knowles,Pop,"Oh baby, how you doing?\nYou know I'm gonna cu..."
1,1,then-tell-me,beyonce-knowles,Pop,"playin' everything so easy,\nit's like you see..."
2,2,honesty,beyonce-knowles,Pop,If you search\nFor tenderness\nIt isn't hard t...
3,3,you-are-my-rock,beyonce-knowles,Pop,"Oh oh oh I, oh oh oh I\n[Verse 1:]\nIf I wrote..."
4,4,black-culture,beyonce-knowles,Pop,"Party the people, the people the party it's po..."
5,5,all-i-could-do-was-cry,beyonce-knowles,Pop,I heard\nChurch bells ringing\nI heard\nA choi...
6,6,once-in-a-lifetime,beyonce-knowles,Pop,This is just another day that I would spend\nW...
7,7,waiting,beyonce-knowles,Pop,"Waiting, waiting, waiting, waiting\nWaiting, w..."
8,8,slow-love,beyonce-knowles,Pop,[Verse 1:]\nI read all of the magazines\nwhile...
9,9,why-don-t-you-love-me,beyonce-knowles,Pop,"N-n-now, honey\nYou better sit down and look a..."


In [4]:
df_track_uri = pd.read_csv('Data/artist_album_track_uri.csv',sep='|')
df_track_uri.head(5)

Unnamed: 0,album_uri,artist_name,artist_uri,track_name,track_uri
0,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,balance (mufasa interlude),1RMvRv1tAvWXgYdH8DUdLX
1,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,BIGGER,4R2KJOgEUP2qO3re9BwIhs
2,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,the stars (mufasa interlude),50QREAhIciZoAiRNtyELEh
3,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,FIND YOUR WAY BACK,65kk9CAAqFI3LWBEhUwVqd
4,552zi1M53PQAX5OH4FIdTx,Beyoncé,6vWDO969PvNqNYHIOW5v0m,uncle scar (scar interlude),4KJ3NHQvLjA0hB2nv43cxD


## Find Matches

In [5]:
df_org_artist_name = df_artist_uri['org_artist_name']

df_spotify_artist_name = df_artist_uri['info_artist_name'].str.lower()

master_match = []


for x in range(len(df_org_artist_name)):
    a = fuzz.partial_ratio(df_org_artist_name[x],df_spotify_artist_name[x])
    master_match.append(a)
    


## Create Fuzzy Wuzzy Match Column in artist_uri df

In [6]:
df_artist_uri['artist_fuzzy_score'] = master_match

## Only keep matches above 80

In [7]:
trim_df_artist_uri = df_artist_uri[df_artist_uri['artist_fuzzy_score']>=80]

## Merge datasets match on org_artist_name (info_artist_name) and lyrics.csv (artist)

In [8]:
df_complete = df_lyrics.merge(trim_df_artist_uri,left_on='org_artist_name',right_on='org_artist_name')
df_complete['term_org'] = df_complete['song'].apply(lambda x: x.replace('-',' '))
df_complete.sample(n=50)

Unnamed: 0,index,song,org_artist_name,genre,lyrics,info_artist_uri,info_artist_name,info_followers_total,info_popularity,artist_fuzzy_score,term_org
58231,78993,different-shades-of-blue,bo-bice,Rock,Sitting in the glare of the neon smoke\nAlways...,2Q1FIPavG8WZF33kqIP3sy,Bo Bice,11961.0,26.0,86,different shades of blue
37185,49853,dressed-in-blue,blue-system,Pop,You're so good - you're so bad\nYou're my chil...,7mDPp4RHlXLWkyLAf3AFBx,Blue System,40030.0,44.0,91,dressed in blue
112721,155889,cravings-of-the-heart,alove-for-enemies,Metal,How shallow have we become?\nWrapped up in poi...,6PIcFzdP8Jes6wLBNzKo3r,Alove For Enemies,1867.0,7.0,88,cravings of the heart
126608,175717,the-one,garbage,Rock,Yeah right from the start you had me hooked\nA...,6S0GHTqz5sxK5f9HtLXn9q,Garbage,707289.0,62.0,100,the one
91136,124997,sooner-or-later,fleetwood-mac,Rock,I thought I'd let you go\nIn my heart and in m...,08GQAI4eElDnROBrJRGE0X,Fleetwood Mac,4495235.0,82.0,92,sooner or later
210459,307117,country-comfort,american-idol,Pop,Soon the pines will be falling everywhere\nVil...,7xI9l2heJcFDlfzwUATUFr,American Idol Finalists - Season 4,972.0,5.0,92,country comfort
206439,299614,and-the-promise-of-the-truth,the-butterfly-effect,Rock,"Into the sea, the shining sea\nYou were taken ...",0kns2X0IFTXafgOGd2IlKJ,The Butterfly Effect,37775.0,42.0,90,and the promise of the truth
143851,202929,ain-t-broke-yet,cowboy-troy,Hip-Hop,Well he ain't broke yet but he's sho' nuff ben...,3Rd3kr9QoBdHGjD1Hk0tkj,Cowboy Troy,17411.0,40.0,91,ain t broke yet
226706,334479,spanish-moss,billy-cobham,Other,,0IwfuIL3gUJxjzUqY3wJ3j,Billy Cobham,66990.0,44.0,92,spanish moss
21796,29938,dreams-are-easy-to-come-by,bill-anderson,Country,,0gWNSMYCSHF4wxIs1XTSDh,Bill Anderson,35836.0,40.0,92,dreams are easy to come by


## Export Unique Artists for Get_AlbumURI.ipynb

In [9]:
trim_df_complete = df_complete[['index','info_artist_uri','info_artist_name']].drop_duplicates()
#trim_df_complete.to_csv('Data/artist_uri_fuzz.csv',sep='|',index=False)

## Org = df_lyrics columns - Use dataframe to search term in large album track uri

In [10]:
df_comp_org = df_complete[['index','info_artist_uri','song']]

## Create term column

In [11]:
df_comp_org['term'] = df_comp_org[['info_artist_uri', 'song']].apply(lambda x: ''.join(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [12]:
df_comp_org

Unnamed: 0,index,info_artist_uri,song,term
0,0,6vWDO969PvNqNYHIOW5v0m,ego-remix,6vWDO969PvNqNYHIOW5v0mego-remix
1,1,6vWDO969PvNqNYHIOW5v0m,then-tell-me,6vWDO969PvNqNYHIOW5v0mthen-tell-me
2,2,6vWDO969PvNqNYHIOW5v0m,honesty,6vWDO969PvNqNYHIOW5v0mhonesty
3,3,6vWDO969PvNqNYHIOW5v0m,you-are-my-rock,6vWDO969PvNqNYHIOW5v0myou-are-my-rock
4,4,6vWDO969PvNqNYHIOW5v0m,black-culture,6vWDO969PvNqNYHIOW5v0mblack-culture
5,5,6vWDO969PvNqNYHIOW5v0m,all-i-could-do-was-cry,6vWDO969PvNqNYHIOW5v0mall-i-could-do-was-cry
6,6,6vWDO969PvNqNYHIOW5v0m,once-in-a-lifetime,6vWDO969PvNqNYHIOW5v0monce-in-a-lifetime
7,7,6vWDO969PvNqNYHIOW5v0m,waiting,6vWDO969PvNqNYHIOW5v0mwaiting
8,8,6vWDO969PvNqNYHIOW5v0m,slow-love,6vWDO969PvNqNYHIOW5v0mslow-love
9,9,6vWDO969PvNqNYHIOW5v0m,why-don-t-you-love-me,6vWDO969PvNqNYHIOW5v0mwhy-don-t-you-love-me


## track_uri = df_artist_uri columns - Use dataframe to be paired with df_comp_org term

In [13]:
df_comp_track_uri = df_track_uri[['artist_uri','track_name','track_uri']]

In [14]:
df_comp_track_uri.head(10)

Unnamed: 0,artist_uri,track_name,track_uri
0,6vWDO969PvNqNYHIOW5v0m,balance (mufasa interlude),1RMvRv1tAvWXgYdH8DUdLX
1,6vWDO969PvNqNYHIOW5v0m,BIGGER,4R2KJOgEUP2qO3re9BwIhs
2,6vWDO969PvNqNYHIOW5v0m,the stars (mufasa interlude),50QREAhIciZoAiRNtyELEh
3,6vWDO969PvNqNYHIOW5v0m,FIND YOUR WAY BACK,65kk9CAAqFI3LWBEhUwVqd
4,6vWDO969PvNqNYHIOW5v0m,uncle scar (scar interlude),4KJ3NHQvLjA0hB2nv43cxD
5,6vWDO969PvNqNYHIOW5v0m,DON'T JEALOUS ME,1yvFoBp2Bq1ilD0518ZpQx
6,6vWDO969PvNqNYHIOW5v0m,danger (young simba & young nala interlude),6Y6BbZoYe2mNLceeoUZdoT
7,6vWDO969PvNqNYHIOW5v0m,JA ARA E,6pdip6qgVJOI5JxqgbAlu6
8,6vWDO969PvNqNYHIOW5v0m,run away (scar & young simba interlude),19omXUq2TcREoUoHqbVLTq
9,6vWDO969PvNqNYHIOW5v0m,NILE,0lboDHM9hSR5j5CFlWDR9k


## Create term column

In [15]:
df_comp_track_uri['term'] = df_comp_track_uri[['artist_uri', 'track_name']].apply(lambda x: ''.join(x), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [16]:
df_comp_track_uri.head(10)

Unnamed: 0,artist_uri,track_name,track_uri,term
0,6vWDO969PvNqNYHIOW5v0m,balance (mufasa interlude),1RMvRv1tAvWXgYdH8DUdLX,6vWDO969PvNqNYHIOW5v0mbalance (mufasa interlude)
1,6vWDO969PvNqNYHIOW5v0m,BIGGER,4R2KJOgEUP2qO3re9BwIhs,6vWDO969PvNqNYHIOW5v0mBIGGER
2,6vWDO969PvNqNYHIOW5v0m,the stars (mufasa interlude),50QREAhIciZoAiRNtyELEh,6vWDO969PvNqNYHIOW5v0mthe stars (mufasa interl...
3,6vWDO969PvNqNYHIOW5v0m,FIND YOUR WAY BACK,65kk9CAAqFI3LWBEhUwVqd,6vWDO969PvNqNYHIOW5v0mFIND YOUR WAY BACK
4,6vWDO969PvNqNYHIOW5v0m,uncle scar (scar interlude),4KJ3NHQvLjA0hB2nv43cxD,6vWDO969PvNqNYHIOW5v0muncle scar (scar interlude)
5,6vWDO969PvNqNYHIOW5v0m,DON'T JEALOUS ME,1yvFoBp2Bq1ilD0518ZpQx,6vWDO969PvNqNYHIOW5v0mDON'T JEALOUS ME
6,6vWDO969PvNqNYHIOW5v0m,danger (young simba & young nala interlude),6Y6BbZoYe2mNLceeoUZdoT,6vWDO969PvNqNYHIOW5v0mdanger (young simba & yo...
7,6vWDO969PvNqNYHIOW5v0m,JA ARA E,6pdip6qgVJOI5JxqgbAlu6,6vWDO969PvNqNYHIOW5v0mJA ARA E
8,6vWDO969PvNqNYHIOW5v0m,run away (scar & young simba interlude),19omXUq2TcREoUoHqbVLTq,6vWDO969PvNqNYHIOW5v0mrun away (scar & young s...
9,6vWDO969PvNqNYHIOW5v0m,NILE,0lboDHM9hSR5j5CFlWDR9k,6vWDO969PvNqNYHIOW5v0mNILE


## Create master dictionary to hold artist_uri, term, and track_uri from df_complete

In [17]:
master_dictionary = {}

keys = df_artist_uri['info_artist_uri']

for key in keys:
    master_dictionary.update({key:[]})

## Test Dictionary

In [18]:
master_dictionary

{'6vWDO969PvNqNYHIOW5v0m': [],
 '7nyLigsJ4LWLqVvMrSlQf7': [],
 '4M07FSqpxgqLfCOaX1WUei': [],
 '6az1ImuFcfXd1Rnzonmqau': [],
 '2wXXMRH4VFDhS2fFE9swlz': [],
 '0ug84nvWi4PxvGIL52EZWr': [],
 '0gt8ziBNDt2u3UCOPE5CNx': [],
 '1L7EOA3HMDy6lOdZKuwedm': [],
 '22ojy4H4ZVpowC4lRRC8In': [],
 '3VQfHuqrRK1CNXR1V8PeR9': [],
 '3Z8Ab6lOrrOzIekUwxLxI0': [],
 '7hZjPEqwCy3BUPaxebd57q': [],
 '3O9bFJBTRddC4HOsZixhuw': [],
 '49eo9xE4yGzJLWkOndiODs': [],
 '1rT2DYzj3q6QnrxaqF4jnN': [],
 '5fBimwbrIjCqNTTlCFHzon': [],
 '4TAV6oFZ5ARdlybbpFAFri': [],
 '4UvhuGdFf75Cfan7lEU1J1': [],
 '6jFcoeKxHPBbCIgFjWm6bc': [],
 '12JKvbMl7cQqmtumTZTT3g': [],
 '0hU5urLse5h1Z0b4zQkovL': [],
 '77OlE8SkEHDkJczfswzDFo': [],
 '3kjuyTCjPG1WMFCiyc5IuB': [],
 '450o9jw6AtiQlQkHCdH6Ru': [],
 '7dSnChJjb0jdfulJsIijoC': [],
 '4x7gxsrTH3gThvSKZPPwaQ': [],
 '4uIdP3jwyR0xifCS2FYS3o': [],
 '1C12JVV5gbeImz20zxX53l': [],
 '2ibMjULyZPLJFpid0NOAGt': [],
 '2BQ6kU5WrbK8qncGLWYOtB': [],
 '37zKpwMQ7NFzk0MPR4FUJe': [],
 '2ykUp1K8tRaOUFd8vvcwXl': [],
 '4w3TFO

## Create list of columns to be appended to master dictionary

In [19]:
append_to_master = df_comp_track_uri[['artist_uri','track_name','track_uri']]

In [20]:
search_term_list = append_to_master.values.tolist()
search_term_list

[['6vWDO969PvNqNYHIOW5v0m',
  'balance (mufasa interlude)',
  '1RMvRv1tAvWXgYdH8DUdLX'],
 ['6vWDO969PvNqNYHIOW5v0m', 'BIGGER', '4R2KJOgEUP2qO3re9BwIhs'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'the stars (mufasa interlude)',
  '50QREAhIciZoAiRNtyELEh'],
 ['6vWDO969PvNqNYHIOW5v0m', 'FIND YOUR WAY BACK', '65kk9CAAqFI3LWBEhUwVqd'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'uncle scar (scar interlude)',
  '4KJ3NHQvLjA0hB2nv43cxD'],
 ['6vWDO969PvNqNYHIOW5v0m', "DON'T JEALOUS ME", '1yvFoBp2Bq1ilD0518ZpQx'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'danger (young simba & young nala interlude)',
  '6Y6BbZoYe2mNLceeoUZdoT'],
 ['6vWDO969PvNqNYHIOW5v0m', 'JA ARA E', '6pdip6qgVJOI5JxqgbAlu6'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'run away (scar & young simba interlude)',
  '19omXUq2TcREoUoHqbVLTq'],
 ['6vWDO969PvNqNYHIOW5v0m', 'NILE', '0lboDHM9hSR5j5CFlWDR9k'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'new lesson (timon, pumbaa & young simba interlude)',
  '3c3XyNDlGm6607OfqGyBHa'],
 ['6vWDO969PvNqNYHIOW5v0m',
  'MOOD 4 EVA (feat. Oumou Sangaré

## Find paired dictionary key. Then append term and track_uri dictionary to key's list.

In [21]:
for search_term in search_term_list:
    master_dictionary[search_term[0]].append({'term':search_term[1].lower(),'track_uri':search_term[2]})

## Clean song column for search

In [22]:
initial_comp = df_comp_org[['index','info_artist_uri','song']]
initial_comp['clean_song'] = initial_comp['song'].apply(lambda x: x.replace('-',' '))
initial_comp = initial_comp.drop(columns='song')

org_comp = initial_comp.values.tolist()


In [23]:
org_comp

[[0, '6vWDO969PvNqNYHIOW5v0m', 'ego remix'],
 [1, '6vWDO969PvNqNYHIOW5v0m', 'then tell me'],
 [2, '6vWDO969PvNqNYHIOW5v0m', 'honesty'],
 [3, '6vWDO969PvNqNYHIOW5v0m', 'you are my rock'],
 [4, '6vWDO969PvNqNYHIOW5v0m', 'black culture'],
 [5, '6vWDO969PvNqNYHIOW5v0m', 'all i could do was cry'],
 [6, '6vWDO969PvNqNYHIOW5v0m', 'once in a lifetime'],
 [7, '6vWDO969PvNqNYHIOW5v0m', 'waiting'],
 [8, '6vWDO969PvNqNYHIOW5v0m', 'slow love'],
 [9, '6vWDO969PvNqNYHIOW5v0m', 'why don t you love me'],
 [10, '6vWDO969PvNqNYHIOW5v0m', 'save the hero'],
 [11, '6vWDO969PvNqNYHIOW5v0m', 'telephone'],
 [12, '6vWDO969PvNqNYHIOW5v0m', 'ice cream truck'],
 [13, '6vWDO969PvNqNYHIOW5v0m', 'no broken hearted girl'],
 [14, '6vWDO969PvNqNYHIOW5v0m', 'control'],
 [15, '6vWDO969PvNqNYHIOW5v0m', 'i m alone now'],
 [16, '6vWDO969PvNqNYHIOW5v0m', 'poison'],
 [17, '6vWDO969PvNqNYHIOW5v0m', 'world wide women'],
 [18, '6vWDO969PvNqNYHIOW5v0m', 'beautiful liar'],
 [19, '6vWDO969PvNqNYHIOW5v0m', 'beautiful liar spanish'],


## Use Fuzzy Wuzzy to match term in org_comp to term in master dictionary

In [24]:
uri_match = []

start = time.time()

#Iterate over length of org_comp

for x in range(len(org_comp)):
    
    artist_uri = org_comp[x][1]
    
    try:
        
        find = master_dictionary[artist_uri]
        
        #Iternate over length of terms found in key = artist_uri
        
        for i in range(len(find)):
            
            match = fuzz.partial_ratio(org_comp[x][2],find[i]['term'])
            
            if match >= 90:
                
                uri_match.append({'index':org_comp[x][0],'term_org':org_comp[x][2],'term_comparison':find[i]['term'],
                                  'track_uri':master_dictionary[artist_uri][i]['track_uri'],
                                  'artist_uri':artist_uri,'track_name_fuzzy_score':match})
                break
                
    except IndexError:
        continue

end = time.time()    
duration = (end-start)/60

## Example output

In [25]:
uri_match[0]

{'index': 9,
 'term_org': 'why don t you love me',
 'term_comparison': "why don't you love me",
 'track_uri': '5Ui8M6tfknhXo4MuGHt3Dy',
 'artist_uri': '6vWDO969PvNqNYHIOW5v0m',
 'track_name_fuzzy_score': 95}

## Duration

In [26]:
f"{duration} minutes"

'4.9999141971270245 minutes'

## Make info_artist_uri be artist_uri so future merge is clean

## Final DataFrame

In [27]:
df_add = df_artist_uri[['info_artist_uri','info_artist_name']]
df_add = df_add.rename(columns={'info_artist_uri':'artist_uri','info_artist_name':'artist_name'})
df_add.head()

Unnamed: 0,artist_uri,artist_name
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé
1,7nyLigsJ4LWLqVvMrSlQf7,Asher Monroe
2,4M07FSqpxgqLfCOaX1WUei,Alice on the roof
3,6az1ImuFcfXd1Rnzonmqau,Daliah Lavi
4,2wXXMRH4VFDhS2fFE9swlz,Borialis


In [28]:
df_successful_matches = pd.DataFrame(uri_match)
df_successful_matches = df_add.merge(df_successful_matches,left_on='artist_uri',right_on='artist_uri',how='right')
#df_successful_matches.to_csv('Data/fuzzy_audiofeatures.csv',sep='|',index=False)
df_successful_matches

Unnamed: 0,artist_uri,artist_name,index,term_comparison,term_org,track_name_fuzzy_score,track_uri
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,9,why don't you love me,why don t you love me,95,5Ui8M6tfknhXo4MuGHt3Dy
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,10,save the hero,save the hero,100,5dhPqcLr5EcSd7Fe4fslCq
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,13,broken-hearted girl,no broken hearted girl,95,5dWTQXVHdoIsSLpEyS3woy
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,29,lift ev'ry voice and sing - homecoming live,if,100,0QRxJvOohS8yiGC1n98uFM
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,42,no angel,angel,100,4DActPOAtak2m8meZeMt3B
5,6vWDO969PvNqNYHIOW5v0m,Beyoncé,56,mine (feat. drake),mine,100,63FrXif0Pdu4NAPvTh87mw
6,6vWDO969PvNqNYHIOW5v0m,Beyoncé,57,superpower (feat. frank ocean),superpower,100,49sXkAcR5LvOrtq5Qcn5cf
7,6vWDO969PvNqNYHIOW5v0m,Beyoncé,58,haunted,haunted,100,7cioKB5CHVzk09SOtTyn0T
8,6vWDO969PvNqNYHIOW5v0m,Beyoncé,59,flawless / feeling myself - homecoming live,flawless,100,6ma6Oe9PrzJsckdCebJoFM
9,6vWDO969PvNqNYHIOW5v0m,Beyoncé,60,partition,partition,100,5hgnY0mVcVetszbb85qeDg


In [29]:
df_lyrics_complete = df_successful_matches.merge(df_complete,left_on='index',right_on='index',how='left')

In [30]:
df_lyrics_complete.head()

Unnamed: 0,artist_uri,artist_name,index,term_comparison,term_org_x,track_name_fuzzy_score,track_uri,song,org_artist_name,genre,lyrics,info_artist_uri,info_artist_name,info_followers_total,info_popularity,artist_fuzzy_score,term_org_y
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,9,why don't you love me,why don t you love me,95,5Ui8M6tfknhXo4MuGHt3Dy,why-don-t-you-love-me,beyonce-knowles,Pop,"N-n-now, honey\nYou better sit down and look a...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86,why don t you love me
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,10,save the hero,save the hero,100,5dhPqcLr5EcSd7Fe4fslCq,save-the-hero,beyonce-knowles,Pop,I lay alone awake at night\nSorrow fills my ey...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86,save the hero
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,13,broken-hearted girl,no broken hearted girl,95,5dWTQXVHdoIsSLpEyS3woy,no-broken-hearted-girl,beyonce-knowles,Pop,Youre everything I thought you never were\nAnd...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86,no broken hearted girl
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,29,lift ev'ry voice and sing - homecoming live,if,100,0QRxJvOohS8yiGC1n98uFM,if,beyonce-knowles,Pop,He is always laughin' and flirting with me\nAn...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86,if
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,42,no angel,angel,100,4DActPOAtak2m8meZeMt3B,angel,beyonce-knowles,Pop,"This is for my fans\n(Uhu, uhu)\nThis is for m...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,22659644.0,88.0,86,angel


In [42]:
df_nlp_data = df_lyrics_complete.rename(columns={'term_comparison':'track_name'})
df_nlp_data = df_nlp_data[['artist_uri','artist_name','track_uri','track_name','lyrics']]
df_nlp_data

Unnamed: 0,artist_uri,artist_name,track_uri,track_name,lyrics
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5Ui8M6tfknhXo4MuGHt3Dy,why don't you love me,"N-n-now, honey\nYou better sit down and look a..."
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dhPqcLr5EcSd7Fe4fslCq,save the hero,I lay alone awake at night\nSorrow fills my ey...
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dWTQXVHdoIsSLpEyS3woy,broken-hearted girl,Youre everything I thought you never were\nAnd...
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,0QRxJvOohS8yiGC1n98uFM,lift ev'ry voice and sing - homecoming live,He is always laughin' and flirting with me\nAn...
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,4DActPOAtak2m8meZeMt3B,no angel,"This is for my fans\n(Uhu, uhu)\nThis is for m..."
5,6vWDO969PvNqNYHIOW5v0m,Beyoncé,63FrXif0Pdu4NAPvTh87mw,mine (feat. drake),[Verse 1: Beyonce]\nI've been watching for the...
6,6vWDO969PvNqNYHIOW5v0m,Beyoncé,49sXkAcR5LvOrtq5Qcn5cf,superpower (feat. frank ocean),[Verse 1]\nWhen the palm of my two hands hold ...
7,6vWDO969PvNqNYHIOW5v0m,Beyoncé,7cioKB5CHVzk09SOtTyn0T,haunted,[Intro: Presenter]\nThe winner is\nBeyonce Kno...
8,6vWDO969PvNqNYHIOW5v0m,Beyoncé,6ma6Oe9PrzJsckdCebJoFM,flawless / feeling myself - homecoming live,[Intro]\nYour challengers are a young group fr...
9,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5hgnY0mVcVetszbb85qeDg,partition,"Part 1: ""YoncÃ©""\n[Intro]\nLet me hear you say..."


In [32]:
#df_nlp_data.to_csv('Data/master_nlp_lyrics.csv',sep='|',index=False)

In [39]:
df_audio_features = pd.read_csv('Data/master_nlp_audio_features.csv',sep='|')

In [40]:
df_audio_features

Unnamed: 0,artist_uri,info_artist_name,Fuzzy Score,term_comparison,term_org,track_uri,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,why don't you love me,why don t you love me,5Ui8M6tfknhXo4MuGHt3Dy,0.046300,0.693,0.730,0.000002,6.0,0.0582,-4.571,1.0,0.0856,136.882,4.0,0.7410
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,save the hero,save the hero,5dhPqcLr5EcSd7Fe4fslCq,0.673000,0.551,0.467,0.000000,10.0,0.0762,-10.364,1.0,0.0332,116.966,4.0,0.1480
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,broken-hearted girl,no broken hearted girl,5dWTQXVHdoIsSLpEyS3woy,0.503000,0.336,0.424,0.000000,5.0,0.2730,-7.203,1.0,0.0353,82.149,4.0,0.1590
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,lift ev'ry voice and sing - homecoming live,if,0QRxJvOohS8yiGC1n98uFM,0.124000,0.127,0.409,0.000002,2.0,0.6830,-13.089,1.0,0.0417,79.904,3.0,0.0782
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,no angel,angel,4DActPOAtak2m8meZeMt3B,0.042400,0.571,0.466,0.002620,9.0,0.1250,-9.153,0.0,0.1730,111.580,1.0,0.5120
5,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,mine (feat. drake),mine,63FrXif0Pdu4NAPvTh87mw,0.067300,0.557,0.428,0.001900,5.0,0.1390,-11.299,1.0,0.1370,103.009,4.0,0.0996
6,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,superpower (feat. frank ocean),superpower,49sXkAcR5LvOrtq5Qcn5cf,0.643000,0.527,0.334,0.000000,0.0,0.1140,-11.540,1.0,0.0681,80.334,3.0,0.1860
7,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,haunted,haunted,7cioKB5CHVzk09SOtTyn0T,0.103000,0.436,0.534,0.006430,1.0,0.5070,-9.416,0.0,0.0773,122.822,4.0,0.3250
8,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,flawless / feeling myself - homecoming live,flawless,6ma6Oe9PrzJsckdCebJoFM,0.079000,0.587,0.764,0.000002,8.0,0.7760,-9.322,1.0,0.2150,137.013,4.0,0.4460
9,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,partition,partition,5hgnY0mVcVetszbb85qeDg,0.029600,0.412,0.441,0.072600,11.0,0.3060,-11.523,0.0,0.2910,185.571,4.0,0.1740


In [73]:
master_lyrics_audio_features = df_nlp_data.merge(df_audio_features,left_on = 'track_uri',right_on='track_uri',how='left')

In [74]:
master_lyrics_audio_features

Unnamed: 0,artist_uri_x,artist_name,track_uri,track_name,lyrics,artist_uri_y,info_artist_name,Fuzzy Score,term_comparison,term_org,...,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5Ui8M6tfknhXo4MuGHt3Dy,why don't you love me,"N-n-now, honey\nYou better sit down and look a...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,why don't you love me,why don t you love me,...,0.730,0.000002,6.0,0.0582,-4.571,1.0,0.0856,136.882,4.0,0.7410
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dhPqcLr5EcSd7Fe4fslCq,save the hero,I lay alone awake at night\nSorrow fills my ey...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,save the hero,save the hero,...,0.467,0.000000,10.0,0.0762,-10.364,1.0,0.0332,116.966,4.0,0.1480
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dWTQXVHdoIsSLpEyS3woy,broken-hearted girl,Youre everything I thought you never were\nAnd...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,95,broken-hearted girl,no broken hearted girl,...,0.424,0.000000,5.0,0.2730,-7.203,1.0,0.0353,82.149,4.0,0.1590
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,0QRxJvOohS8yiGC1n98uFM,lift ev'ry voice and sing - homecoming live,He is always laughin' and flirting with me\nAn...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,lift ev'ry voice and sing - homecoming live,if,...,0.409,0.000002,2.0,0.6830,-13.089,1.0,0.0417,79.904,3.0,0.0782
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,4DActPOAtak2m8meZeMt3B,no angel,"This is for my fans\n(Uhu, uhu)\nThis is for m...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,no angel,angel,...,0.466,0.002620,9.0,0.1250,-9.153,0.0,0.1730,111.580,1.0,0.5120
5,6vWDO969PvNqNYHIOW5v0m,Beyoncé,4DActPOAtak2m8meZeMt3B,no angel,"This is for my fans\n(Uhu, uhu)\nThis is for m...",6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,no angel,no angel,...,0.466,0.002620,9.0,0.1250,-9.153,0.0,0.1730,111.580,1.0,0.5120
6,6vWDO969PvNqNYHIOW5v0m,Beyoncé,63FrXif0Pdu4NAPvTh87mw,mine (feat. drake),[Verse 1: Beyonce]\nI've been watching for the...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,mine (feat. drake),mine,...,0.428,0.001900,5.0,0.1390,-11.299,1.0,0.1370,103.009,4.0,0.0996
7,6vWDO969PvNqNYHIOW5v0m,Beyoncé,49sXkAcR5LvOrtq5Qcn5cf,superpower (feat. frank ocean),[Verse 1]\nWhen the palm of my two hands hold ...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,superpower (feat. frank ocean),superpower,...,0.334,0.000000,0.0,0.1140,-11.540,1.0,0.0681,80.334,3.0,0.1860
8,6vWDO969PvNqNYHIOW5v0m,Beyoncé,7cioKB5CHVzk09SOtTyn0T,haunted,[Intro: Presenter]\nThe winner is\nBeyonce Kno...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,haunted,haunted,...,0.534,0.006430,1.0,0.5070,-9.416,0.0,0.0773,122.822,4.0,0.3250
9,6vWDO969PvNqNYHIOW5v0m,Beyoncé,7cioKB5CHVzk09SOtTyn0T,haunted,[Intro: Presenter]\nThe winner is\nBeyonce Kno...,6vWDO969PvNqNYHIOW5v0m,Beyoncé,100,haunted,haunted michael diamond remix,...,0.534,0.006430,1.0,0.5070,-9.416,0.0,0.0773,122.822,4.0,0.3250


In [75]:
master_lyrics_audio_features = master_lyrics_audio_features.drop(columns=['artist_uri_y','info_artist_name',
                                                                         'Fuzzy Score','term_comparison',
                                                                          'term_org',])

In [76]:
master_lyrics_audio_features = master_lyrics_audio_features.rename({'artist_uri_x':'artist_uri'})

In [78]:
master_lyrics_audio_features.head()

Unnamed: 0,artist_uri_x,artist_name,track_uri,track_name,lyrics,acousticness,danceability,energy,instrumentalness,key,liveness,loudness,mode,speechiness,tempo,time_signature,valence
0,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5Ui8M6tfknhXo4MuGHt3Dy,why don't you love me,"N-n-now, honey\nYou better sit down and look a...",0.0463,0.693,0.73,2e-06,6.0,0.0582,-4.571,1.0,0.0856,136.882,4.0,0.741
1,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dhPqcLr5EcSd7Fe4fslCq,save the hero,I lay alone awake at night\nSorrow fills my ey...,0.673,0.551,0.467,0.0,10.0,0.0762,-10.364,1.0,0.0332,116.966,4.0,0.148
2,6vWDO969PvNqNYHIOW5v0m,Beyoncé,5dWTQXVHdoIsSLpEyS3woy,broken-hearted girl,Youre everything I thought you never were\nAnd...,0.503,0.336,0.424,0.0,5.0,0.273,-7.203,1.0,0.0353,82.149,4.0,0.159
3,6vWDO969PvNqNYHIOW5v0m,Beyoncé,0QRxJvOohS8yiGC1n98uFM,lift ev'ry voice and sing - homecoming live,He is always laughin' and flirting with me\nAn...,0.124,0.127,0.409,2e-06,2.0,0.683,-13.089,1.0,0.0417,79.904,3.0,0.0782
4,6vWDO969PvNqNYHIOW5v0m,Beyoncé,4DActPOAtak2m8meZeMt3B,no angel,"This is for my fans\n(Uhu, uhu)\nThis is for m...",0.0424,0.571,0.466,0.00262,9.0,0.125,-9.153,0.0,0.173,111.58,1.0,0.512


In [79]:
master_lyrics_audio_features.to_csv('Data/master_lyrics_audio_features.csv',sep='|',index=False)