# Creating a DTM

In [18]:
import pandas as pd
from datetime import datetime
import spacy

In [14]:
# load in the lyrics data set
first_df = pd.read_csv("lyrics.csv")

In [15]:
# create a song id number by renaming the index
first_df.rename(columns={"index":"song_id"}, inplace=True)

In [16]:
# split the dataframe
info = first_df[["song_id", "song", "year", "artist", "genre"]]
lyrics = first_df[["song_id", "lyrics"]]

print(info.columns)
print(lyrics.columns)

Index(['song_id', 'song', 'year', 'artist', 'genre'], dtype='object')
Index(['song_id', 'lyrics'], dtype='object')


In [19]:
# load the spaCy object with the english corpus
nlp = spacy.load("en_core_web_md")

In [28]:
# create a test case of one set of lyrics
doc = nlp(lyrics.lyrics[0])

# check the parts of speech etc
for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_, token.shape_, token.is_alpha, token.is_stop)

print("###################################################")

# check the tokenization
for token in doc:
    print(token.text)
    
print("###################################################")

# check the named entities in the document
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

print("###################################################")

# check if the words have vectors
for token in doc:
    print(token.text, token.has_vector, token.vector_norm, token.is_oov)

Oh oh INTJ UH intj Xx True False
baby baby NOUN NN npadvmod xxxx True False
, , PUNCT , punct , False False
how how ADV WRB advmod xxx True True
you -PRON- PRON PRP nsubj xxx True True
doing do VERB VBG ROOT xxxx True True
? ? PUNCT . punct ? False False

 
 SPACE _SP  
 False False
You -PRON- PRON PRP nsubj Xxx True True
know know VERB VBP parataxis xxxx True False
I -PRON- PRON PRP nsubj X True True
'm be AUX VBP aux 'x False True
gon go VERB VBG ROOT xxx True False
na to PART TO aux xx True False
cut cut VERB VB xcomp xxx True False
right right ADV RB dobj xxxx True False
to to ADP IN prep xx True True
the the DET DT det xxx True True
chase chase NOUN NN pobj xxxx True False

 
 SPACE _SP  
 False False
Some some DET DT det Xxxx True True
women woman NOUN NNS nsubjpass xxxx True False
were be AUX VBD auxpass xxxx True True
made make VERB VBN ROOT xxxx True True
but but CCONJ CC cc xxx True True
me -PRON- PRON PRP conj xx True True
, , PUNCT , punct , False False
myself -PRON- PRON P

, , PUNCT , punct , False False
I -PRON- PRON PRP nsubj X True True
call call VERB VBP parataxis xxxx True True
it -PRON- PRON PRP dobj xx True True
confident confident ADJ JJ oprd xxxx True False

 
 SPACE _SP  
 False False
You -PRON- PRON PRP nsubj Xxx True True
decide decide VERB VBP ROOT xxxx True False
when when ADV WRB advmod xxxx True True
you -PRON- PRON PRP nsubj xxx True True
find find VERB VBP advcl xxxx True False
on on ADP IN prep xx True True
what what PRON WP pobj xxxx True True
I -PRON- PRON PRP nsubj X True True
'm be AUX VBP aux 'x False True
working work VERB VBG pcomp xxxx True False
with with ADP IN prep xxxx True True

 
 SPACE _SP  
 False False
Damn damn INTJ UH ROOT Xxxx True False
I -PRON- PRON PRP nsubj X True True
know know VERB VBP parataxis xxxx True False
I -PRON- PRON PRP nsubj X True True
'm be AUX VBP aux 'x False True
killing kill VERB VBG ROOT xxxx True False
you -PRON- PRON PRP dobj xxx True True
with with ADP IN prep xxxx True True
them -PRON- PRO


It
's
too
much
,
it
's
too
tough


I
talk
like
this
'cause
I
can
back
it
up


I
got
a
big
ego
,
such
a
huge
ego


But
he
love
my
big
ego
,
it
's
too
much


I
walk
like
this
'cause
I
can
back
it
up


I
,
I
walk
like
this
'cause
I
can
back
it
up


I
,
I
talk
like
this
'cause
I
can
back
it
up


I
,
I
can
back
it
up
,
I
can
back
it
up


I
walk
like
this
'cause
I
can
back
it
up


It
's
too
big
,
it
's
too
wide


It
's
too
strong
,
it
wo
n't
fit


It
's
too
much
,
it
's
too
tough


He
talk
like
this
'cause
he
can
back
it
up


He
got
a
big
ego
,
such
a
huge
ego
,
such
a
huge
ego


I
love
his
big
ego
,
it
's
too
much


He
walk
like
this
'cause
he
can
back
it
up


Ego
so
big
,
you
must
admit


I
got
every
reason
to
feel
like
I
'm
that
bitch


Ego
so
strong
,
if
you
ai
n't
know


I
do
n't
need
no
beat
,
I
can
sing
it
with
piano
###################################################
###################################################
Oh True 6.044095 False
baby True 6.911526 False
, True 5.094723 F

can True 5.132161 False
back True 5.1515756 False
it True 4.9409766 False
up True 4.6057925 False

 False 0.0 False
I True 6.4231944 False
got True 5.3200083 False
a True 5.306696 False
big True 5.7743006 False
ego True 6.463813 False
, True 5.094723 False
such True 5.4577436 False
a True 5.306696 False
huge True 5.932221 False
ego True 6.463813 False

 False 0.0 False
But True 4.903002 False
he True 6.080851 False
love True 6.04035 False
my True 5.837562 False
big True 5.7743006 False
ego True 6.463813 False
, True 5.094723 False
it True 4.9409766 False
's True 5.1889863 False
too True 5.173234 False
much True 5.084145 False

 False 0.0 False
I True 6.4231944 False
walk True 6.524474 False
like True 4.78322 False
this True 5.0461264 False
'cause True 5.8536057 False
I True 6.4231944 False
can True 5.132161 False
back True 5.1515756 False
it True 4.9409766 False
up True 4.6057925 False

 False 0.0 False
I True 6.4231944 False
, True 5.094723 False
I True 6.4231944 False
walk True 6.524