In [4]:
import pandas as pd
import numpy as np

In [138]:
#importing the excel file
vocab = pd.ExcelFile(r'Hip Hop _ Vocab Chart.xlsx')

In [139]:
#checking the sheet names to see what I need
vocab.sheet_names

['Artist Album Links', 'Sheet8', 'Full']

In [140]:
#importing the full sheet to look at the data
fullsheet = pd.read_excel(r'Hip Hop _ Vocab Chart.xlsx', sheet_name = 'Full')

In [141]:
#checking how many rows. Looks like there's 186 artists
len(fullsheet)

186

In [142]:
#checking the columns to see what's happening
fullsheet.head()

Unnamed: 0,notes,recalc,era,rapper,rapper_clean,color,words,id,source
0,,2988.0,2010s,21 Savage,21 Savage,,2988,21-savage,new
1,,3812.0,2010s,2 Chainz,2 Chainz,,4148,2chainz,poster
2,,3815.0,1990s,2Pac,2Pac,west nowutang second,3970,2pac,site
3,,3384.0,2000s,50 Cent,50 Cent,east nowutang first,3591,50-cent,site
4,,4534.0,2010s,Ab-Soul,Ab-Soul,,4534,absoul,new


In [143]:
#taking only the rows that have info that I could go off of
improvedfullsheet = fullsheet[['id','rapper','era','recalc']]

In [144]:
#dropping NaN values that might be in those columns
improvedfullsheet = improvedfullsheet.dropna()

In [176]:
#checking how many rows dropped. Looks like 26 artists got dropped
len(improvedfullsheet)

160

In [145]:
#renaming the 'recalc' column to words since it seems that the actual value
improvedfullsheet = improvedfullsheet.rename(columns = {'recalc': 'words'})

In [150]:
#sorting the data by the words count
improvedfullsheetsort = improvedfullsheet.sort_values(by = 'words', ascending = False)

In [178]:
#Technically the top 50 Hip Hop artists of all time in terms of words used in their music
#Fun fact: It takes until number 21 (Watsky) for an artists in the 2010s to be in the top
#Fun fact 2: Jean Grae is the top female Hip Hop artist in place number 14
improvedfullsheetsort.head(50)

Unnamed: 0,id,rapper,era,words
6,aesop,Aesop Rock,2000s,7879.0
24,busdriver,Busdriver,2000s,7324.0
68,jedi,Jedi Mind Tricks,2000s,6424.0
57,gza,GZA (only solo albums),1990s,6390.0
156,wu-tang,Wu-Tang Clan,1990s,6196.0
106,mf-doom,MF DOOM,2000s,6169.0
131,rza,RZA (only solo albums),2000s,6018.0
61,immortal,Immortal Technique,2000s,5930.0
27,canibus,Canibus,2000s,5915.0
54,ghostface-killah,Ghostface Killah (only solo albums),2000s,5901.0


In [155]:
#The bottom 50 Hip Hop Artists according to unique words used
improvedfullsheetsort.tail(50)

Unnamed: 0,id,rapper,era,words
137,snoop,Snoop Dogg,1990s,3797.0
48,foxy-brown,Foxy Brown,1990s,3782.0
77,kanye-west,Kanye West,2000s,3760.0
17,bob,BoB,2010s,3748.0
108,missy-elliot,Missy Elliot,2000s,3747.0
101,machine,Machine Gun Kelly,2010s,3690.0
87,lecrae,LeCrae,2000s,3687.0
28,childish,Childish Gambino,2010s,3682.0
71,juelz,Juelz Santana,2000s,3675.0
45,eve,Eve,2000s,3642.0


In [179]:
#checking the average amount of words used as a whole. They averaged 4263 words per artist
improvedfullsheetsort.describe()

Unnamed: 0,words
count,160.0
mean,4263.2875
std,927.617293
min,2472.0
25%,3635.5
50%,4204.0
75%,4691.0
max,7879.0


In [180]:
#creating a new dataframe based on the 1980s era
improved1980s = improvedfullsheet[improvedfullsheet['era'] == "1980s"]

In [181]:
#sorting them by unique words
improved1980s = improved1980s.sort_values(by = "words", ascending = False)

In [182]:
#top 1980s Hip Hop artits ranked by unique word count
improved1980s

Unnamed: 0,id,rapper,era,words
11,big-daddy-kane,Big Daddy Kane,1980s,4655.0
95,ll-cool-j,LL Cool J,1980s,4638.0
10,beastie-boys,Beastie Boys,1980s,4632.0
15,biz-markie,Biz Markie,1980s,4423.0
60,ice-t,Ice T,1980s,4334.0
121,rakim,Rakim (including Eric B. & Rakim),1980s,4301.0
129,run-dmc,Run-D.M.C.,1980s,3899.0
143,too-short,Too Short,1980s,2778.0


In [183]:
#checking the stats for the 1980s artists. They averaged 4207, just slightly below the overall average
improved1980s.describe()

Unnamed: 0,words
count,8.0
mean,4207.5
std,629.695391
min,2778.0
25%,4200.5
50%,4378.5
75%,4633.5
max,4655.0


In [184]:
#creating table for 1990s artists
improved1990s = improvedfullsheet[improvedfullsheet['era'] == "1990s"]

In [185]:
#sorting them by unique words count
improved1990s = improved1990s.sort_values(by = "words", ascending = False)

In [186]:
#ranking of 1990s Hip Hop artists by unique word counts
improved1990s

Unnamed: 0,id,rapper,era,words
57,gza,GZA (only solo albums),1990s,6390.0
156,wu-tang,Wu-Tang Clan,1990s,6196.0
38,del,Del the Funky Homosapian,1990s,5845.0
127,roots,The Roots,1990s,5781.0
85,kool-keith,Kool Keith,1990s,5566.0
84,kool-g-rap,Kool G Rap,1990s,5554.0
120,raekwon,Raekwon (only solo albums),1990s,5428.0
122,redman,Redman,1990s,5196.0
35,das-efx,Das EFX,1990s,5178.0
30,common,Common,1990s,5138.0


In [187]:
#checking the stats. They averaged 4544 words, about 200 words more per artist, but also has almost 40 more artist
improved1990s.describe()

Unnamed: 0,words
count,44.0
mean,4544.704545
std,779.2852
min,2936.0
25%,3904.25
50%,4558.0
75%,4986.0
max,6390.0


In [188]:
#creating table for 2000s artists
improved2000s = improvedfullsheet[improvedfullsheet['era'] == "2000s"]

In [189]:
#sorting by unique word counts
improved2000s = improved2000s.sort_values(by = 'words', ascending = False)

In [190]:
#Ranking of the 2000s Hip Hop Artists by unique word count
improved2000s

Unnamed: 0,id,rapper,era,words
6,aesop,Aesop Rock,2000s,7879.0
24,busdriver,Busdriver,2000s,7324.0
68,jedi,Jedi Mind Tricks,2000s,6424.0
106,mf-doom,MF DOOM,2000s,6169.0
131,rza,RZA (only solo albums),2000s,6018.0
61,immortal,Immortal Technique,2000s,5930.0
27,canibus,Canibus,2000s,5915.0
54,ghostface-killah,Ghostface Killah (only solo albums),2000s,5901.0
16,blackalicious,Blackalicious,2000s,5741.0
67,jean,Jean Grae,2000s,5738.0


In [191]:
#checking 2000s Hip hop artists word stats. They averaged 4633 words per artists, slightly more than the last era, but with 6 more artists
improved2000s.describe()

Unnamed: 0,words
count,50.0
mean,4633.58
std,1045.522096
min,3275.0
25%,3866.0
50%,4399.0
75%,5314.25
max,7879.0


In [192]:
#creating a table for 2010s Hip Hop Artists
improved2010s = improvedfullsheet[improvedfullsheet['era'] == "2010s"]

In [193]:
#sorting the table by unique word counts
improved2010s = improved2010s.sort_values(by = "words", ascending = False)

In [194]:
#ranking of the 2010s Hip Hop Artists by unique word count
improved2010s

Unnamed: 0,id,rapper,era,words
154,watsky,Watsky,2010s,5413.0
5,action,Action Bronson,2010s,5233.0
75,kaan,KAAN,2010s,5107.0
47,flatbush,Flatbush Zombies,2010s,4879.0
70,joey,Joey BadA$$,2010s,4871.0
126,rittz,Rittz,2010s,4757.0
4,absoul,Ab-Soul,2010s,4534.0
150,tyler,Tyler the Creator,2010s,4431.0
39,denzel,Denzel Curry,2010s,4426.0
7,asap,A$AP Rocky,2010s,4414.0


In [195]:
#checking the stats for 2010s Hip Hop artists. They averaged 3722 words per artists, with the most amount of rappers and yet under 500 words less than previous eras.
#This could be due to the fact that the other artists have had more time rapping, or that artists are simply using less unique words in their songs than previous.
improved2010s.describe()

Unnamed: 0,words
count,57.0
mean,3722.807018
std,697.193134
min,2472.0
25%,3197.0
50%,3690.0
75%,4163.0
max,5413.0
